新增 "a500_予我廣韻標音" 功能

AlanJui · Apr 13, 2024 · 3a0a68e · 3a0a68e
1 parent 96f937a
commit 3a0a68e
Show file tree

Hide file tree

Showing 10 changed files with 339 additions and 3 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -71,6 +71,13 @@
             "args": [
                 "德紅"
             ]
+        },
+        {
+            "name": "a500_予我廣韻標音",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "a500_予我廣韻標音.py",
+            "console": "integratedTerminal",
         }
     ]
 }
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,6 +1,6 @@
 {
     "[python]": {
-        "editor.defaultFormatter": "ms-python.autopep8"
+        "editor.defaultFormatter": "ms-python.black-formatter"
     },
     "python.formatting.provider": "none",
     "sqltools.connections": [

diff --git a/Kong_Un_V2.db b/Kong_Un_V2.db
diff --git a/Kong_Un_V2.db-journal b/Kong_Un_V2.db-journal
diff --git a/a500_予我廣韻標音.py b/a500_予我廣韻標音.py
@@ -0,0 +1,116 @@
+#================================================================
+# 《予我廣韻標音》
+# 使用《廣韻》作為漢字標讀音之依據。
+#================================================================
+import getopt
+import os
+import sys
+
+import xlwings as xw
+
+import settings
+from p000_import_source_data import main_run as san_sing_han_ji_tsu_im_paiau
+from p200_Iong_Nga_Siok_Thong_Zu_Im import main_run as hoo_gua_tsu_im
+from p300_Kong_Un_Cha_Ji_Tian import Kong_Un_Piau_Im as tsa_ji_tian_tshue_tsu_im
+
+
+def get_cmd_input(gargv):
+    arg_input = ""
+    arg_output = ""
+    arg_user = ""
+    arg_help = "{0} -i <input> -u <user> -o <output>".format(argv[0])
+
+    try:
+        opts, args = getopt.getopt(  # pyright: ignore
+            argv[1:], "hi:u:o:", ["help", "input=", "user=", "output="]
+        )
+    except Exception as e:
+        print(e)
+        print(arg_help)
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt in ("-h", "--help"):
+            print(arg_help)  # print the help message
+            sys.exit(2)
+        elif opt in ("-i", "--input"):
+            arg_input = arg
+        elif opt in ("-u", "--user"):
+            arg_user = arg
+        elif opt in ("-o", "--output"):
+            arg_output = arg
+
+    print("input:", arg_input)
+    print("user:", arg_user)
+    print("output:", arg_output)
+
+    return {
+        "input": arg_input,
+        "user": arg_user,
+        "output": arg_output,
+    }
+
+def main():
+    # =========================================================================
+    # (1) 取得需要注音的「檔案名稱」及其「目錄路徑」。
+    # =========================================================================
+    # 取得 Input 檔案名稱
+    file_path = settings.get_input_file_path()
+    if not file_path:
+        print("未設定 .env 檔案")
+        # sys.exit(2)
+        opts = get_cmd_input(sys.argv)
+        if opts["input"] != "":
+            CONVERT_FILE_NAME = opts["input"]
+        else:
+            CONVERT_FILE_NAME = "Piau-Tsu-Im"
+    else:
+        CONVERT_FILE_NAME = file_path
+    print(f"CONVERT_FILE_NAME = {CONVERT_FILE_NAME}")
+
+    # =========================================================================
+    # (2) 建置「漢字注音表」
+    # 將存放在「工作表1」的「漢字」文章，製成「漢字注音表」以便填入注音。
+    # =========================================================================
+    san_sing_han_ji_tsu_im_paiau(CONVERT_FILE_NAME)
+
+    # =========================================================================
+    # (3) 在字典查注音，填入漢字注音表。
+    # =========================================================================
+    tsa_ji_tian_tshue_tsu_im(CONVERT_FILE_NAME)
+
+    # =========================================================================
+    # (4) 將已注音之「漢字注音表」，製作成 HTML 格式之「注音／拼音／標音」網頁。
+    # =========================================================================
+    hoo_gua_tsu_im(CONVERT_FILE_NAME)
+
+    # ==========================================================
+    # 檢查「缺字表」狀態
+    # ==========================================================
+    # 指定來源工作表
+    source_sheet = xw.Book(CONVERT_FILE_NAME).sheets["缺字表"]
+    # 取得工作表內總列數
+    end_row_no = (
+        source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row
+    )
+    if end_row_no > 1:
+        print(f"總計字典查不到注音的漢字共：{end_row_no}個。")
+
+    # =========================================================================
+    # (5) 依據《文章標題》另存新檔。
+    # =========================================================================
+    wb = xw.Book(CONVERT_FILE_NAME)
+    setting_sheet = wb.sheets["env"]
+    new_file_name = str(
+        setting_sheet.range("C4").value
+    ).strip()
+    new_file_path = os.path.join(
+        ".\\output", 
+        f"【河洛話注音】{new_file_name}" + ".xlsx")
+
+    # 儲存新建立的工作簿
+    wb.save(new_file_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/a800_漢字查廣韻擬音_SQLite.py b/a800_漢字查廣韻擬音_SQLite.py
@@ -10,8 +10,8 @@
 
 import sys
 
-from mod_Query_for_Tshiat_Gu import query_tshiat_gu_siong_ji, query_tshiat_gu_ha_ji
-from mod_huan_tshiat import tshu_tiau, query_tiau_ho
+from mod_huan_tshiat import query_tiau_ho, tshu_tiau
+from mod_Query_for_Tshiat_Gu import query_tshiat_gu_ha_ji, query_tshiat_gu_siong_ji
 from mod_於字典網站查詢漢字之廣韻切語發音 import fetch_kong_un_info
 
 

diff --git a/mod_廣韻.py b/mod_廣韻.py
@@ -251,3 +251,24 @@ def query_table_by_id(cursor, table_name, fields, id):
 
     # 回傳字典列表
     return dict_results
+
+
+# =========================================================
+# 判斷調號
+# =========================================================
+def piau_tiau_ho(ji_tian_piau_im):
+    tiau_ho_list = {
+        '清平': 1,
+        '清上': 2,
+        '清去': 3,
+        '清入': 4,
+        '濁平': 5,
+        '濁上': 2,
+        '濁去': 7,
+        '濁入': 8,
+    }
+    siong_ji_cing_tok = ji_tian_piau_im['清濁']
+    cing_tok = siong_ji_cing_tok[-1]
+    sing_tiau = ji_tian_piau_im['調']
+    su_sing_pat_tiau = tiau_ho_list[ f"{cing_tok}{sing_tiau}" ]
+    return su_sing_pat_tiau
diff --git a/output/Piau-Tsu-Im.xlsx b/output/Piau-Tsu-Im.xlsx
diff --git a/output/【河洛話注音】文章標題.xlsx b/output/【河洛話注音】文章標題.xlsx
diff --git a/p300_Kong_Un_Cha_Ji_Tian.py b/p300_Kong_Un_Cha_Ji_Tian.py
@@ -0,0 +1,192 @@
+import re
+import sqlite3
+
+import xlwings as xw
+
+from mod_廣韻 import han_ji_cha_piau_im, piau_tiau_ho
+
+# 專案全域常數
+# from config_dev_env import DATABASE
+DATABASE = "Kong_Un_V2.db"
+
+
+def Kong_Un_Piau_Im(CONVERT_FILE_NAME):
+    # ==========================================================
+    # 在「漢字注音表」B欄已有台羅拼音，需將之拆分成聲母、韻母、調號
+    # 聲母、韻母、調號，分別存放在 C、D、E 欄
+    # ==========================================================
+
+    # 指定提供來源的【檔案】
+    file_path = CONVERT_FILE_NAME
+    wb = xw.Book(file_path)
+
+    # 指定提供來源的【工作表】；及【總列數】
+    source_sheet = wb.sheets["漢字注音表"]
+    end_of_row_no = (
+        source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row
+    )
+    end_of_row_no = int(end_of_row_no) - 1
+    print(f"end_row = {end_of_row_no}")
+
+    # ==========================================================
+    # 備妥程式需使用之工作表
+    # ==========================================================
+    sheet_name_list = [
+        "缺字表",
+        "字庫表",
+    ]
+    # ----------------------------------------------------------
+    # 檢查工作表是否已存在？
+    # 若已存在，則清除工作表內容；
+    # 若不存在，則新增工作表
+    # ----------------------------------------------------------
+    for sheet_name in sheet_name_list:
+        sheet = wb.sheets[sheet_name]
+        try:
+            sheet.select()
+            sheet.clear()
+            continue
+        except Exception as e:
+            # CommandError 的 Exception 發生日，表工作表不存在
+            # 新增程式需使用之工作表
+            print(e)
+            wb.sheets.add(name=sheet_name)
+
+    khiam_ji_piau = wb.sheets["缺字表"]
+    ji_khoo_piau = wb.sheets["字庫表"]
+
+    # ==========================================================
+    # 在「漢字注音表」B欄已有台羅拼音，需將之拆分成聲母、韻母、調號
+    # 聲母、韻母、調號，分別存放在 C、D、E 欄
+    # ==========================================================
+    han_ji_cu_im_piau = wb.sheets["漢字注音表"]
+    han_ji_cu_im_piau.select()
+
+    # =========================================================="
+    # 資料庫",
+    # =========================================================="
+    conn = sqlite3.connect(DATABASE)
+    db_cursor = conn.cursor()
+    source_index = 1  # index for source sheet
+    target_index = 1
+    ji_khoo_index = 1
+    khiam_ji_index = 1
+
+    while source_index <= end_of_row_no:
+        print(f"row = {source_index}")
+        # 自 source_sheet 取出一個「欲查注音的漢字」(beh_piau_im_e_han_ji)
+        beh_piau_im_e_han_ji = str(
+            source_sheet.range("A" + str(source_index)).value
+        ).strip()
+
+        # =========================================================
+        # 如是空白或換行，處理換行
+        # =========================================================
+        if beh_piau_im_e_han_ji == " " or beh_piau_im_e_han_ji == "":
+            target_index += 1
+            source_index += 1
+            continue
+        elif beh_piau_im_e_han_ji == "\n":
+            han_ji_cu_im_piau.range("A" + str(target_index)).value = "\n"
+            target_index += 1
+            source_index += 1
+            continue
+
+        # =========================================================
+        # 若取出之字為標點符號，則跳過，並繼續取下一個漢字。
+        # =========================================================
+        piau_tiam_1 = r"[，、：；．。？！（）「」【】《》“]"
+        piau_tiam_2 = r"[\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F\uFE50-\uFE5E]"  # noqa: E501
+        # piau_tiam = r"[\u2013-\u2026\u3000-\u303F\uFE50-\uFF20]"
+        piau_tiam = f"{piau_tiam_1}|{piau_tiam_2}"
+        is_piau_tiam = re.search(piau_tiam, beh_piau_im_e_han_ji, re.M | re.I)
+        if is_piau_tiam:
+            target_index += 1
+            source_index += 1
+            continue
+
+        # =========================================================
+        # 在【字庫】資料庫查找【注音碼】
+        # SQL 查詢指令：自字庫查找某漢字之注音碼
+        # =========================================================
+        kong_un_piau_im = han_ji_cha_piau_im(db_cursor, beh_piau_im_e_han_ji)
+
+        # =========================================================
+        # 若是查不到漢字的注音碼，在【缺字表】做記錄
+        # =========================================================
+        if not kong_un_piau_im:
+            print(f"廣韻字典查不到此漢字：【{beh_piau_im_e_han_ji}】!!")
+            # 記錄【缺字表】的【列號】
+            khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index
+            # 記錄【缺字表】的【漢字】
+            khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_piau_im_e_han_ji
+            # 記錄【漢字注音表】的【列號】
+            khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index
+            khiam_ji_index += 1
+            target_index += 1
+            source_index += 1
+            continue
+
+        # =========================================================
+        # 自【字庫】查到的【漢字】，取出：聲母、韻母、調號
+        # =========================================================
+        piau_im_tsong_soo = len(kong_un_piau_im)
+        piau_im = kong_un_piau_im[0]
+        han_ji_id = piau_im['漢字識別號']
+        sing_bu = piau_im['上字標音'] if piau_im['上字標音'] != "Ø" else "q"
+        un_bu = piau_im['下字標音']
+        tiau_ho = piau_tiau_ho(piau_im)
+        cu_im = f"{sing_bu}{un_bu}{tiau_ho}"
+
+        # =========================================================
+        # 寫入：【漢字注音表】
+        # =========================================================
+        han_ji_cu_im_piau.range("B" + str(target_index)).value = cu_im
+        han_ji_cu_im_piau.range("C" + str(target_index)).value = sing_bu
+        han_ji_cu_im_piau.range("D" + str(target_index)).value = un_bu
+        han_ji_cu_im_piau.range("E" + str(target_index)).value = tiau_ho
+        han_ji_cu_im_piau.range("F" + str(target_index)).value = piau_im_tsong_soo
+
+        # =========================================================
+        # 若是查到漢字有一個以上的注音碼，在【字庫表】做記錄
+        # ji_khoo_sheet  = wb.sheets["字庫表"]
+        # =========================================================
+        if piau_im_tsong_soo > 1:
+            for index in range(piau_im_tsong_soo):
+                piau_im = kong_un_piau_im[index]
+                han_ji_id = piau_im['漢字識別號']
+                sing_bu = piau_im['上字標音'] if piau_im['上字標音'] != "Ø" else "q"
+                un_bu = piau_im['下字標音']
+                tiau_ho = piau_tiau_ho(piau_im)
+                cu_im = f"{sing_bu}{un_bu}{tiau_ho}"
+
+                # 記錄對映至【漢字注音表】的【列號】
+                ji_khoo_piau.range("A" + str(ji_khoo_index)).value = source_index
+
+                # 記錄【字庫】資料庫的【紀錄識別碼（Record ID of Table）】
+                ji_khoo_piau.range("B" + str(ji_khoo_index)).value = han_ji_id
+
+                ji_khoo_piau.range("C" + str(ji_khoo_index)).value = (
+                    beh_piau_im_e_han_ji
+                )
+                ji_khoo_piau.range("D" + str(ji_khoo_index)).value = cu_im
+                ji_khoo_piau.range("E" + str(ji_khoo_index)).value = sing_bu
+                ji_khoo_piau.range("F" + str(ji_khoo_index)).value = un_bu
+                ji_khoo_piau.range("G" + str(ji_khoo_index)).value = tiau_ho
+
+                ji_khoo_index += 1
+
+        # =========================================================
+        # 調整讀取來源；寫入標的各手標
+        # =========================================================
+        target_index += 1
+        source_index += 1
+
+    # ==========================================================
+    # 關閉資料庫
+    # ==========================================================
+    conn.close()
+
+if __name__ == "__main__":
+    CONVERT_FILE_NAME = "output\\Piau-Tsu-Im.xlsx"
+    Kong_Un_Piau_Im(CONVERT_FILE_NAME)