重構專案文檔管理架構；並請查字典找漢字讀音的資料庫改用 SQLite 。

AlanJui · Mar 8, 2024 · 6704fbc · 6704fbc
1 parent 9e697db
commit 6704fbc
Show file tree

Hide file tree

Showing 30 changed files with 242 additions and 41 deletions.
diff --git a/Kong_Un.db b/Kong_Un.db
diff --git a/config_dev_env.py b/config_dev_env.py
@@ -20,3 +20,6 @@
 
 # Constants
 WAIT_TIME = 5  # seconds
+
+# Database
+DATABASE_PATH = '.\\Kong_Un.db'
diff --git a/docs/diagrams/example/SDD-001.puml → docs/diagrams/SDD-001_廣韻切語字典資料模型.md b/docs/diagrams/example/SDD-001.puml → docs/diagrams/SDD-001_廣韻切語字典資料模型.md
@@ -1,3 +1,6 @@
+# 廣韻切語字典資料模型
+
+```plantuml
 @startyaml
 #highlight "切語上字"
 #highlight "切語上字" / "漢字"
@@ -22,4 +25,5 @@
 	韻母: 東一
     調: 平
     擬音: ong
-@endyaml
+@endyaml
+```
diff --git a/docs/diagrams/example/a001.md → docs/diagrams/a000_建立注音檔案程式架構.md b/docs/diagrams/example/a001.md → docs/diagrams/a000_建立注音檔案程式架構.md
diff --git a/docs/diagrams/example/a002.md → docs/diagrams/a100_予我漢字注音程式架構.md b/docs/diagrams/example/a002.md → docs/diagrams/a100_予我漢字注音程式架構.md
diff --git a/docs/diagrams/example/a120.md → docs/diagrams/a120.md b/docs/diagrams/example/a120.md → docs/diagrams/a120.md
diff --git a/docs/diagrams/example/a001.puml b/docs/diagrams/example/a001.puml
diff --git a/docs/diagrams/m100_建立標注音檔案.md b/docs/diagrams/m100_建立標注音檔案.md
@@ -0,0 +1,26 @@
+# m100 建立標注音檔案
+
+```plantuml
+@startuml
+start
+
+partition 新建活頁簿檔案 {
+    :建立一個 Excel 活頁簿檔案;
+    :將預設工作表的 A 欄寬度設為 128;
+    :繼上，設定 A 欄所有儲存格，皆可自動換行;
+}
+
+' 自 output 子目錄、env.xlsx 活頁簿檔案，抄錄 env 工作表;
+partition 複製模版工作表 {
+    :打開模版活頁簿檔案 ./output/env.xlsx;
+    :指定此活頁簿檔案為來源檔案;
+    :將來源檔案的工作表複製到新建活頁簿檔案中;
+}
+
+partition 存檔處理 {
+    :將新建之活頁簿檔案，以 "Piau-Tsu-Im.xlsx" 為檔名儲存;
+}
+
+stop
+@enduml
+```
diff --git a/docs/diagrams/m300_查字典標注音.md b/docs/diagrams/m300_查字典標注音.md
@@ -0,0 +1,2 @@
+# m300 查字典標注音
+
diff --git a/docs/diagrams/example/C4-Model/c4-001.puml → libs/diagrams/example/C4-Model/c4-001.puml b/docs/diagrams/example/C4-Model/c4-001.puml → libs/diagrams/example/C4-Model/c4-001.puml
diff --git a/docs/diagrams/example/C4-Model/c4-002.puml → libs/diagrams/example/C4-Model/c4-002.puml b/docs/diagrams/example/C4-Model/c4-002.puml → libs/diagrams/example/C4-Model/c4-002.puml
diff --git a/docs/diagrams/example/C4-Model/c4-003.puml → libs/diagrams/example/C4-Model/c4-003.puml b/docs/diagrams/example/C4-Model/c4-003.puml → libs/diagrams/example/C4-Model/c4-003.puml
diff --git a/docs/diagrams/example/C4-Model/c4-004.puml → libs/diagrams/example/C4-Model/c4-004.puml b/docs/diagrams/example/C4-Model/c4-004.puml → libs/diagrams/example/C4-Model/c4-004.puml
diff --git a/docs/diagrams/example/C4-Model/c4-005.puml → libs/diagrams/example/C4-Model/c4-005.puml b/docs/diagrams/example/C4-Model/c4-005.puml → libs/diagrams/example/C4-Model/c4-005.puml
diff --git a/docs/diagrams/example/C4-Model/c4-006.puml → libs/diagrams/example/C4-Model/c4-006.puml b/docs/diagrams/example/C4-Model/c4-006.puml → libs/diagrams/example/C4-Model/c4-006.puml
diff --git a/docs/diagrams/example/C4-Model/c4-007.puml → libs/diagrams/example/C4-Model/c4-007.puml b/docs/diagrams/example/C4-Model/c4-007.puml → libs/diagrams/example/C4-Model/c4-007.puml
diff --git a/...rams/example/C4-Model/tree-table-001.puml → ...rams/example/C4-Model/tree-table-001.puml b/...rams/example/C4-Model/tree-table-001.puml → ...rams/example/C4-Model/tree-table-001.puml
diff --git a/...rams/example/C4-Model/wirefirmae-002.puml → ...rams/example/C4-Model/wirefirmae-002.puml b/...rams/example/C4-Model/wirefirmae-002.puml → ...rams/example/C4-Model/wirefirmae-002.puml
diff --git a/...grams/example/C4-Model/wireframe-001.puml → ...grams/example/C4-Model/wireframe-001.puml b/...grams/example/C4-Model/wireframe-001.puml → ...grams/example/C4-Model/wireframe-001.puml
diff --git a/docs/diagrams/example/class.puml → libs/diagrams/example/class.puml b/docs/diagrams/example/class.puml → libs/diagrams/example/class.puml
diff --git a/docs/diagrams/example/flow_chart.puml → libs/diagrams/example/flow_chart.puml b/docs/diagrams/example/flow_chart.puml → libs/diagrams/example/flow_chart.puml
diff --git a/docs/diagrams/example/flow_chart_001.puml → libs/diagrams/example/flow_chart_001.puml b/docs/diagrams/example/flow_chart_001.puml → libs/diagrams/example/flow_chart_001.puml
diff --git a/...iagrams/example/sequence_diagram_001.puml → ...iagrams/example/sequence_diagram_001.puml b/...iagrams/example/sequence_diagram_001.puml → ...iagrams/example/sequence_diagram_001.puml
diff --git a/docs/diagrams/example/yaml-001.puml → libs/diagrams/example/yaml-001.puml b/docs/diagrams/example/yaml-001.puml → libs/diagrams/example/yaml-001.puml
diff --git a/docs/SDD_001.md → libs/diagrams/program_flow_chart.md b/docs/SDD_001.md → libs/diagrams/program_flow_chart.md
@@ -1,3 +1,5 @@
+
+```plantuml
 @startuml
 start
 :Let entSet be a set of Entitlements to revoke;
@@ -36,6 +38,5 @@ entitlements of entSet entitlements;;
 Consumers that have an entitlement in entSet;
 stop
 @enduml
-
-plantuml
+```
 
diff --git a/output/Piau-Tsu-Im.xlsx b/output/Piau-Tsu-Im.xlsx
diff --git a/output/【河洛話注音】《水龍吟·登建康賞心亭》.xlsx b/output/【河洛話注音】《水龍吟·登建康賞心亭》.xlsx
diff --git a/output/【河洛話注音】水龍吟·登建康賞心亭.xlsx b/output/【河洛話注音】水龍吟·登建康賞心亭.xlsx
diff --git a/p100_tsa_ji_tian.py b/p100_tsa_ji_tian.py
@@ -1,8 +1,10 @@
 import re
 
-import psycopg2
+import sqlite3
 import xlwings as xw
 
+# 專案全域常數
+from config_dev_env import DATABASE_PATH
 
 def main_run(CONVERT_FILE_NAME):
     # ==========================================================
@@ -59,10 +61,7 @@ def main_run(CONVERT_FILE_NAME):
     # =========================================================="
     # 資料庫",
     # =========================================================="
-    conn = psycopg2.connect(
-        # database="alanjui", user="alanjui", host="127.0.0.1", port="5432"
-        database="Ho_Lok_Ue", user="postgres", password="ChingHai99@", host="127.0.0.1", port="5432"
-    )
+    conn = sqlite3.connect(DATABASE_PATH)
     db_cursor = conn.cursor()
     source_index = 1  # index for source sheet
     target_index = 1
@@ -110,8 +109,8 @@ def main_run(CONVERT_FILE_NAME):
         #           from han_ji
         #           where han_ji='{search_han_ji}'
         sql = (
-            "SELECT id, han_ji, chu_im, freq, siann, un, tiau "
-            "FROM han_ji_dict "
+            "SELECT id, han_ji, tl_im, freq, siann, un, tiau "
+            "FROM Sip_Ngoo_Im_Han_Ji_Tian "
             f"WHERE han_ji='{beh_tshue_tsu_im_e_ji}' "
             "ORDER BY freq DESC;"
         )

diff --git a/p100_tsa_ji_tian_postgres.py b/p100_tsa_ji_tian_postgres.py
@@ -0,0 +1,197 @@
+import re
+
+import psycopg2
+import xlwings as xw
+
+
+def main_run(CONVERT_FILE_NAME):
+    # ==========================================================
+    # 在「漢字注音表」B欄已有台羅拼音，需將之拆分成聲母、韻母、調號
+    # 聲母、韻母、調號，分別存放在 C、D、E 欄
+    # ==========================================================
+
+    # 指定提供來源的【檔案】
+    file_path = CONVERT_FILE_NAME
+    wb = xw.Book(file_path)
+
+    # 指定提供來源的【工作表】；及【總列數】
+    source_sheet = wb.sheets["漢字注音表"]
+    end_of_row_no = (
+        source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row
+    )
+    end_of_row_no = int(end_of_row_no) - 1
+    print(f"end_row = {end_of_row_no}")
+
+    # ==========================================================
+    # 備妥程式需使用之工作表
+    # ==========================================================
+    sheet_name_list = [
+        "缺字表",
+        "字庫表",
+    ]
+    # ----------------------------------------------------------
+    # 檢查工作表是否已存在？
+    # 若已存在，則清除工作表內容；
+    # 若不存在，則新增工作表
+    # ----------------------------------------------------------
+    for sheet_name in sheet_name_list:
+        sheet = wb.sheets[sheet_name]
+        try:
+            sheet.select()
+            sheet.clear()
+            continue
+        except Exception as e:
+            # CommandError 的 Exception 發生日，表工作表不存在
+            # 新增程式需使用之工作表
+            print(e)
+            wb.sheets.add(name=sheet_name)
+
+    khiam_ji_piau = wb.sheets["缺字表"]
+    ji_khoo_piau = wb.sheets["字庫表"]
+
+    # ==========================================================
+    # 在「漢字注音表」B欄已有台羅拼音，需將之拆分成聲母、韻母、調號
+    # 聲母、韻母、調號，分別存放在 C、D、E 欄
+    # ==========================================================
+    han_ji_tsu_im_piau = wb.sheets["漢字注音表"]
+    han_ji_tsu_im_piau.select()
+
+    # =========================================================="
+    # 資料庫",
+    # =========================================================="
+    conn = psycopg2.connect(
+        # database="alanjui", user="alanjui", host="127.0.0.1", port="5432"
+        database="Ho_Lok_Ue", user="postgres", password="ChingHai99@", host="127.0.0.1", port="5432"
+    )
+    db_cursor = conn.cursor()
+    source_index = 1  # index for source sheet
+    target_index = 1
+    ji_khoo_index = 1
+    khiam_ji_index = 1
+
+    while source_index <= end_of_row_no:
+        print(f"row = {source_index}")
+        # 自 source_sheet 取出一個「欲查注音的漢字」(beh_tshue_tsu_im_e_ji)
+        beh_tshue_tsu_im_e_ji = str(
+            source_sheet.range("A" + str(source_index)).value
+        ).strip()
+
+        # =========================================================
+        # 如是空白或換行，處理換行
+        # =========================================================
+        if beh_tshue_tsu_im_e_ji == " " or beh_tshue_tsu_im_e_ji == "":
+            target_index += 1
+            source_index += 1
+            continue
+        elif beh_tshue_tsu_im_e_ji == "\n":
+            han_ji_tsu_im_piau.range("A" + str(target_index)).value = "\n"
+            target_index += 1
+            source_index += 1
+            continue
+
+        # =========================================================
+        # 若取出之字為標點符號，則跳過，並繼續取下一個漢字。
+        # =========================================================
+        piau_tiam_1 = r"[，、：；．。？！（）「」【】《》“]"
+        piau_tiam_2 = r"[\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F\uFE50-\uFE5E]"  # noqa: E501
+        # piau_tiam = r"[\u2013-\u2026\u3000-\u303F\uFE50-\uFF20]"
+        piau_tiam = f"{piau_tiam_1}|{piau_tiam_2}"
+        is_piau_tiam = re.search(piau_tiam, beh_tshue_tsu_im_e_ji, re.M | re.I)
+        if is_piau_tiam:
+            target_index += 1
+            source_index += 1
+            continue
+
+        # =========================================================
+        # 在【字庫】資料庫查找【注音碼】
+        # SQL 查詢指令：自字庫查找某漢字之注音碼
+        # =========================================================
+        # sql = select id, han_ji, chu_im, freq, siann, un, tiau
+        #           from han_ji
+        #           where han_ji='{search_han_ji}'
+        sql = (
+            "SELECT id, han_ji, chu_im, freq, siann, un, tiau "
+            "FROM han_ji_dict "
+            f"WHERE han_ji='{beh_tshue_tsu_im_e_ji}' "
+            "ORDER BY freq DESC;"
+        )
+        db_cursor.execute(sql)
+        ji_e_piau_im = db_cursor.fetchall()
+
+        # =========================================================
+        # 若是查不到漢字的注音碼，在【缺字表】做記錄
+        # =========================================================
+        if not ji_e_piau_im:
+            print(f"Can not find 【{beh_tshue_tsu_im_e_ji}】in Han-Ji-Khoo!!")
+            # 記錄【缺字表】的【列號】
+            khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index
+            # 記錄【缺字表】的【漢字】
+            khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_tshue_tsu_im_e_ji
+            # 記錄【漢字注音表】的【列號】
+            khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index
+            khiam_ji_index += 1
+            target_index += 1
+            source_index += 1
+            continue
+
+        # =========================================================
+        # 自【字庫】查到的【漢字】，取出：聲母、韻母、調號
+        # =========================================================
+        piau_im_tsong_soo = len(ji_e_piau_im)
+        han_ji_id = ji_e_piau_im[0][0]
+        tsu_im = ji_e_piau_im[0][2]
+        freq = ji_e_piau_im[0][3]
+        siann_bu = ji_e_piau_im[0][4]
+        un_bu = ji_e_piau_im[0][5]
+        tiau_ho = ji_e_piau_im[0][6]
+
+        # =========================================================
+        # 寫入：【漢字注音表】
+        # =========================================================
+        han_ji_tsu_im_piau.range("B" + str(target_index)).value = tsu_im
+        han_ji_tsu_im_piau.range("C" + str(target_index)).value = siann_bu
+        han_ji_tsu_im_piau.range("D" + str(target_index)).value = un_bu
+        han_ji_tsu_im_piau.range("E" + str(target_index)).value = tiau_ho
+        han_ji_tsu_im_piau.range("F" + str(target_index)).value = piau_im_tsong_soo
+        han_ji_tsu_im_piau.range("G" + str(target_index)).value = freq
+
+        # =========================================================
+        # 若是查到漢字有一個以上的注音碼，在【字庫表】做記錄
+        # ji_khoo_sheet  = wb.sheets["字庫表"]
+        # =========================================================
+        if piau_im_tsong_soo > 1:
+            for piau_im_index in range(piau_im_tsong_soo):
+                han_ji_id = ji_e_piau_im[piau_im_index][0]
+                tsu_im = ji_e_piau_im[piau_im_index][2]
+                freq = ji_e_piau_im[piau_im_index][3]
+                siann_bu = ji_e_piau_im[piau_im_index][4]
+                un_bu = ji_e_piau_im[piau_im_index][5]
+                tiau_ho = ji_e_piau_im[piau_im_index][6]
+
+                # 記錄對映至【漢字注音表】的【列號】
+                ji_khoo_piau.range("A" + str(ji_khoo_index)).value = source_index
+
+                # 記錄【字庫】資料庫的【紀錄識別碼（Record ID of Table）】
+                ji_khoo_piau.range("B" + str(ji_khoo_index)).value = han_ji_id
+
+                ji_khoo_piau.range(
+                    "C" + str(ji_khoo_index)
+                ).value = beh_tshue_tsu_im_e_ji
+                ji_khoo_piau.range("D" + str(ji_khoo_index)).value = tsu_im
+                ji_khoo_piau.range("E" + str(ji_khoo_index)).value = siann_bu
+                ji_khoo_piau.range("F" + str(ji_khoo_index)).value = un_bu
+                ji_khoo_piau.range("G" + str(ji_khoo_index)).value = tiau_ho
+                ji_khoo_piau.range("H" + str(ji_khoo_index)).value = freq
+
+                ji_khoo_index += 1
+
+        # =========================================================
+        # 調整讀取來源；寫入標的各手標
+        # =========================================================
+        target_index += 1
+        source_index += 1
+
+    # ==========================================================
+    # 關閉資料庫
+    # ==========================================================
+    conn.close()