加入選擇【白話音】、【文讀音】查音自動化處理功能。

AlanJui · Oct 12, 2024 · a817e2f · a817e2f
1 parent 9e6c9c4
commit a817e2f
Show file tree

Hide file tree

Showing 14 changed files with 536 additions and 22 deletions.
diff --git a/Documents/Sip-Ngoo-Im/README.md b/Documents/Sip-Ngoo-Im/README.md
@@ -311,3 +311,43 @@ UPDATE Han_Ji_Tian
 SET 台語音標拼音 = SUBSTR(台語音標拼音, 2)
 WHERE 台語音標拼音 LIKE 'q%';
 ```
+
+## 台羅音標漢字庫
+
+### 資料表結構（Schema）
+
+```bash
+CREATE TABLE 台羅音標漢字庫 (
+    識別號  INTEGER NOT NULL
+                 UNIQUE,
+    漢字   TEXT,
+    台羅音標 TEXT,
+    常用度  TEXT,
+    摘要說明 TEXT,
+    建立時間 TEXT    DEFAULT (DATETIME('now', 'localtime') ) 
+                 NOT NULL,
+    更新時間 TEXT    NOT NULL
+                 DEFAULT (DATETIME('now', 'localtime') ),
+    PRIMARY KEY (
+        識別號 AUTOINCREMENT
+    )
+);
+```
+
+
+### 資料更新觸發器
+
+```bash
+DROP TRIGGER IF EXISTS 紀錄更新觸發器;
+
+CREATE TRIGGER 紀錄更新觸發器
+AFTER UPDATE ON 台羅音標漢字庫
+FOR EACH ROW
+WHEN NEW.更新時間 = OLD.更新時間
+BEGIN
+    UPDATE 台羅音標漢字庫
+    SET 更新時間 = DATETIME('now', 'localtime')
+    WHERE 識別號 = NEW.識別號;
+END;
+```
+
diff --git a/Nga_Siok_Thong_Sip_Ngoo_Im.db b/Nga_Siok_Thong_Sip_Ngoo_Im.db
diff --git a/Tai_Loo_Han_Ji_Khoo.db b/Tai_Loo_Han_Ji_Khoo.db
diff --git a/a702_查找及填入漢字標音.py b/a702_查找及填入漢字標音.py
@@ -6,6 +6,18 @@
 from p702_Ca_Han_Ji_Thak_Im import ca_han_ji_thak_im
 from p730_Tng_Sing_Bang_Iah import tng_sing_bang_iah
 
+
+# ==========================================================
+# 查詢語音類型，若未設定則預設為文讀音
+# ==========================================================
+def get_sound_type(wb):
+    try:
+        reading_type = wb.names['語音類型'].refers_to_range.value
+    except KeyError:
+        reading_type = "文讀音"
+    return reading_type
+
+
 # 指定虛擬環境的 Python 路徑
 venv_python = os.path.join(".venv", "Scripts", "python.exe") if sys.platform == "win32" else os.path.join(".venv", "bin", "python")
 
@@ -36,10 +48,12 @@
 sheet.range('A1').select()     # 將 A1 儲存格設為作用儲存格
 
 # (2) A731: 自動為漢字查找讀音，並抄寫到漢字的上方(拼音)及下方(注音)。
-ca_han_ji_thak_im(wb, '漢字注音', 'V3')
+# type = '白話音'
+type = get_sound_type(wb) 
+ca_han_ji_thak_im(wb, '漢字注音', 'V3', type)
 
 # (3) A740: 將【漢字注音】工作表的內容，轉成 HTML 網頁檔案。
-tng_sing_bang_iah(wb, '漢字注音', 'V3')
+# tng_sing_bang_iah(wb, '漢字注音', 'V3')
 
 # (4) A750: 將 Tai_Gi_Zu_Im_Bun.xlsx 檔案，依 env 工作表的設定，另存新檔到指定目錄。
 try:

diff --git a/docs/桃花源記_漢字注音.html b/docs/桃花源記_漢字注音.html
diff --git a/mod_台羅音標漢字庫.py b/mod_台羅音標漢字庫.py
@@ -15,20 +15,29 @@ def close_db_connection(conn):
     # 關閉數據庫連接
     conn.close()
 
+
 # ==========================================================
 # 用 `漢字` 查詢《台語音標》的讀音資訊
 # ==========================================================
-def han_ji_ca_piau_im(cursor, han_ji):
+def han_ji_ca_piau_im(cursor, han_ji, reading_type="文讀音"):
     """
     根據漢字查詢其台羅音標及相關讀音資訊，並將台羅音標轉換為台語音標。
     若資料紀錄在`常用度`欄位儲存值為空值(NULL)，則將其視為 0，因此可排在查詢結果的最後。
     
     :param cursor: 數據庫游標
     :param han_ji: 欲查詢的漢字
+    :param reading_type: 查詢的讀音類型，可以是 "文讀音" 或 "白話音"
     :return: 包含讀音資訊的字典列表，包含台語音標、聲母、韻母、聲調。
     """
 
-    query = """
+    if reading_type == "文讀音":
+        reading_condition = "常用度 >= 0.61"
+    elif reading_type == "白話音":
+        reading_condition = "常用度 <= 0.60"
+    else:
+        reading_condition = "1=1"  # 查詢所有
+
+    query = f"""
     SELECT 
         識別號,
         漢字,
@@ -38,14 +47,34 @@ def han_ji_ca_piau_im(cursor, han_ji):
     FROM 
         台羅音標漢字庫
     WHERE 
-        漢字 = ?
+        漢字 = ? AND ({reading_condition})
     ORDER BY 
         COALESCE(常用度, 0) DESC;
     """
 
     cursor.execute(query, (han_ji,))
     results = cursor.fetchall()
 
+    # 如果沒有找到符合條件的讀音，則查詢所有讀音，並選擇常用度最高者
+    if not results:
+        query = """
+        SELECT 
+            識別號,
+            漢字,
+            台羅音標,
+            常用度,
+            摘要說明
+        FROM 
+            台羅音標漢字庫
+        WHERE 
+            漢字 = ?
+        ORDER BY 
+            COALESCE(常用度, 0) DESC
+        LIMIT 1;
+        """
+        cursor.execute(query, (han_ji,))
+        results = cursor.fetchall()
+
     # 定義【台羅音標】到【台語音標】的轉換規則
     tai_luo_to_tai_gi_mapping = {
         'tsh': 'c',
@@ -85,9 +114,9 @@ def han_ji_ca_piau_im(cursor, han_ji):
 # 自「台語音標+」，分析出：聲母、韻母、聲調
 # ==========================================================
 def split_zu_im(zu_im):
-    # 先進行聲母轉換處理
-    zu_im = zu_im.replace("tsh", "c").replace("ch", "c")  # 將 tsh, ch 轉換為 c
-    zu_im = zu_im.replace("ts", "z").replace("c", "z")  # 將 ts, c 轉換為 z
+    # 聲母相容性轉換處理（將 tsh 轉換為 c；將 ts 轉換為 z）
+    zu_im = zu_im.replace("tsh", "c")   # 將 tsh 轉換為 c
+    zu_im = zu_im.replace("ts", "z")    # 將 ts  轉換為 z
 
     # 定義聲母的正規表示式，包括常見的聲母，但不包括 m 和 ng
     siann_bu_pattern = re.compile(r"(b|c|z|g|h|j|kh|k|l|m(?!\d)|ng(?!\d)|n|ph|p|s|th|t|Ø)")

diff --git a/output2/【河洛話注音】working.xlsx b/output2/【河洛話注音】working.xlsx
diff --git a/output2/【河洛話注音】桃花源記.xlsx b/output2/【河洛話注音】桃花源記.xlsx
diff --git a/output2/【河洛話注音】桃花源記【文讀音】.xlsx b/output2/【河洛話注音】桃花源記【文讀音】.xlsx
diff --git a/output2/【河洛話注音】桃花源記【白話音】.xlsx b/output2/【河洛話注音】桃花源記【白話音】.xlsx
diff --git a/output2/【河洛話注音】桃花源記（文讀音）.xlsx b/output2/【河洛話注音】桃花源記（文讀音）.xlsx
diff --git a/output2/【河洛話注音】金剛般若波羅蜜經001。法會因由分第一.xlsx b/output2/【河洛話注音】金剛般若波羅蜜經001。法會因由分第一.xlsx
diff --git a/p702_Ca_Han_Ji_Thak_Im.py b/p702_Ca_Han_Ji_Thak_Im.py
@@ -23,7 +23,7 @@ def is_valid_han_ji(char):
     return char not in punctuation_marks
 
 
-def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3'):
+def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', type="文讀音"):
     # 顯示「已輸入之拼音字母及注音符號」 
     named_range = wb.names['顯示注音輸入']  # 選擇名為 "顯示注音輸入" 的命名範圍# 選擇名為 "顯示注音輸入" 的命名範圍
     named_range.refers_to_range.value = True
@@ -122,8 +122,8 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3'):
                         sheet.range((row - 1, col)).value = lo_ma_im_piau
                         sheet.range((row + 1, col)).value = zu_im_hu_ho
                     else:
-                        # 查找漢字讀音 
-                        result = han_ji_ca_piau_im(cursor, han_ji)
+                        # 查找漢字讀音 (type: 白話音、文讀音)
+                        result = han_ji_ca_piau_im(cursor, han_ji, type) 
 
                         # 取羅馬拼音和台語注音
                         if result:

diff --git a/p711_TL_Tng_Zu_Im.py b/p711_TL_Tng_Zu_Im.py
@@ -56,18 +56,7 @@ def TL_Tng_Zu_Im(siann_bu, un_bu, siann_tiau, cursor):
     # - xform/ㆡ(ㄧ|ㆪ)/ㆢ$1/
     #=======================================================================
 
-    # 取【韻母】的第一個注音符號
-    # first_un_bu_char = zu_im_un_bu[0] if zu_im_un_bu else ''
-    # 
     # 比對聲母是否為 ㄗ、ㄘ、ㄙ、ㆡ，且韻母的第一個符號是 ㄧ 或 ㆪ
-    # if zu_im_siann_bu == 'ㄗ' and (first_un_bu_char == 'ㄧ' or first_un_bu_char == 'ㆪ'):
-    #     zu_im_siann_bu = 'ㄐ'
-    # elif zu_im_siann_bu == 'ㄘ' and (first_un_bu_char == 'ㄧ' or first_un_bu_char == 'ㆪ'):
-    #     zu_im_siann_bu = 'ㄑ'
-    # elif zu_im_siann_bu == 'ㄙ' and (first_un_bu_char == 'ㄧ' or first_un_bu_char == 'ㆪ'):
-    #     zu_im_siann_bu = 'ㄒ'
-    # elif zu_im_siann_bu == 'ㆡ' and (first_un_bu_char == 'ㄧ' or first_un_bu_char == 'ㆪ'):
-    #     zu_im_siann_bu = 'ㆢ'
     if siann_bu == 'z' and (un_bu[0] == 'i' or un_bu == 'inn'):
         zu_im_siann_bu = 'ㄐ'
     elif siann_bu == 'c' and (un_bu[0] == 'i' or un_bu == 'inn'):