Skip to content

Commit

Permalink
最佳化:漢字標音的方式
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Jan 8, 2025
1 parent 359e855 commit 2d86863
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 80 deletions.
32 changes: 32 additions & 0 deletions mod_標音.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,3 +844,35 @@ def han_ji_piau_im_tng_huan(self, piau_im, piau_im_huat, siann_bu, un_bu, tiau_h
# '韻母': zu_im_un_bu,
# '聲調': zu_im_siann_tiau
# }

# # ==========================================================
# # 注音法設定和共用變數
# # ==========================================================
# zu_im_huat_list = {
# "SNI": ["fifteen_yin", "rt", "十五音切語"],
# "TPS": ["Piau_Im", "rt", "方音符號注音"],
# "POJ": ["pin_yin", "rt", "白話字拼音"],
# "TL": ["pin_yin", "rt", "台羅拼音"],
# "BP": ["pin_yin", "rt", "閩拼標音"],
# "TLPA_Plus": ["pin_yin", "rt", "台羅改良式"],
# "DBL": ["Siang_Pai", "rtc", "雙排注音"],
# }


# def choose_piau_im_method(piau_im, zu_im_huat, siann_bu, un_bu, tiau_ho):
# """選擇並執行對應的注音方法"""
# if zu_im_huat == "十五音":
# return piau_im.SNI_piau_im(siann_bu, un_bu, tiau_ho)
# elif zu_im_huat == "白話字":
# return piau_im.POJ_piau_im(siann_bu, un_bu, tiau_ho)
# elif zu_im_huat == "台羅拼音":
# return piau_im.TL_piau_im(siann_bu, un_bu, tiau_ho)
# elif zu_im_huat == "閩拼方案":
# return piau_im.BP_piau_im(siann_bu, un_bu, tiau_ho)
# elif zu_im_huat == "方音符號":
# return piau_im.TPS_piau_im(siann_bu, un_bu, tiau_ho)
# elif zu_im_huat == "台語音標":
# siann = piau_im.Siann_Bu_Dict[siann_bu]["台語音標"] or ""
# un = piau_im.Un_Bu_Dict[un_bu]["台語音標"]
# return f"{siann}{un}{tiau_ho}"
# return ""
Binary file not shown.
105 changes: 41 additions & 64 deletions p702_Ca_Han_Ji_Thak_Im.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,40 +13,9 @@
from mod_標音 import PiauIm
from p740_Phua_Im_Ji import PhuaImJi

# ==========================================================
# 注音法設定和共用變數
# ==========================================================
zu_im_huat_list = {
"SNI": ["fifteen_yin", "rt", "十五音切語"],
"TPS": ["Piau_Im", "rt", "方音符號注音"],
"POJ": ["pin_yin", "rt", "白話字拼音"],
"TL": ["pin_yin", "rt", "台羅拼音"],
"BP": ["pin_yin", "rt", "閩拼標音"],
"TLPA_Plus": ["pin_yin", "rt", "台羅改良式"],
"DBL": ["Siang_Pai", "rtc", "雙排注音"],
}


def choose_piau_im_method(piau_im, zu_im_huat, siann_bu, un_bu, tiau_ho):
"""選擇並執行對應的注音方法"""
if zu_im_huat == "十五音":
return piau_im.SNI_piau_im(siann_bu, un_bu, tiau_ho)
elif zu_im_huat == "白話字":
return piau_im.POJ_piau_im(siann_bu, un_bu, tiau_ho)
elif zu_im_huat == "台羅拼音":
return piau_im.TL_piau_im(siann_bu, un_bu, tiau_ho)
elif zu_im_huat == "閩拼方案":
return piau_im.BP_piau_im(siann_bu, un_bu, tiau_ho)
elif zu_im_huat == "方音符號":
return piau_im.TPS_piau_im(siann_bu, un_bu, tiau_ho)
elif zu_im_huat == "台語音標":
siann = piau_im.Siann_Bu_Dict[siann_bu]["台語音標"] or ""
un = piau_im.Un_Bu_Dict[un_bu]["台語音標"]
return f"{siann}{un}{tiau_ho}"
return ""

def cu_siann_un_tiau(result, han_ji_khoo, piau_im, piau_im_huat):
"""查詢【漢字庫】取得之【查找結果】,將之切分:聲、韻、調"""

def za_ji_kiat_ko_cut_piau_im(result, han_ji_khoo, piau_im, piau_im_huat):
"""查字結果出標音:查詢【漢字庫】取得之【查找結果】,將之切分:聲、韻、調"""
if han_ji_khoo == "河洛話":
#-----------------------------------------------------------------
# 【白話音】:依《河洛話漢字庫》標注【台語音標】和【方音符號】
Expand All @@ -66,10 +35,9 @@ def cu_siann_un_tiau(result, han_ji_khoo, piau_im, piau_im_huat):
if siann_bu == "" or siann_bu == None:
siann_bu = "ø"


# 將【聲母】、【韻母】、【聲調】,合併成【台語音標】
# tai_gi_im_piau = siann_bu + un_bu + tiau_ho
tai_gi_im_piau = ''.join([siann_bu, un_bu, tiau_ho])
# tai_gi_im_piau = ''.join([siann_bu, un_bu, tiau_ho])
tai_gi_im_piau = siann_bu + un_bu + tiau_ho

# 標音法為:【十五音】或【雅俗通】,且【聲母】為空值,則將【聲母】設為【ø】
if (piau_im_huat == "十五音" or piau_im_huat == "雅俗通") and (siann_bu == "" or siann_bu == None):
Expand All @@ -83,7 +51,9 @@ def cu_siann_un_tiau(result, han_ji_khoo, piau_im, piau_im_huat):
)
return tai_gi_im_piau, han_ji_piau_im


def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話音", han_ji_khoo="河洛話", db_name='Ho_Lok_Ue.db', module_name='mod_河洛話', function_name='han_ji_ca_piau_im'):
"""查漢字讀音:依【漢字】查找【台語音標】,並依指定之【標音方法】輸出【漢字標音】"""
# 初始化 PiauIm 類別,産生標音物件
piau_im = PiauIm(han_ji_khoo=han_ji_khoo)
piau_im_huat = wb.names['標音方法'].refers_to_range.value
Expand Down Expand Up @@ -152,11 +122,15 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
print(f"({row}, {col_name}) = {msg}")
index += 1
continue
else:
han_ji = cell_value

# 自儲存格取出【漢字】
han_ji = cell_value
han_ji_u_piau_im = False

# 依據【人工標音】欄是否有輸入,決定【漢字標音】之處理方式
manual_input = sheet.range((row-2, col)).value
if manual_input: # 若【手動輸入】的欄位有輸入

if manual_input: # 若有人工輸入之處理作業
if '〔' in manual_input and '〕' in manual_input:
# 將人工輸入的〔台語音標〕轉換成【方音符號】
im_piau = manual_input.split('〔')[1].split('〕')[0]
Expand All @@ -168,6 +142,7 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
)
han_ji_u_piau_im = True
elif '【' in manual_input and '】' in manual_input:
# 將人工輸入的【方音符號】轉換成【台語音標】
han_ji_piau_im = manual_input.split('【')[1].split('】')[0]
Expand All @@ -179,30 +154,28 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
tiau=tiau,
cursor=cursor,
)['台語音標']
han_ji_u_piau_im = True
else:
# 將人工輸入,沒有以[中括號]標示的【台語音標】轉換成【方音符號】
im_piau = manual_input
siann, un, tiau = split_tai_gi_im_piau(im_piau)
tai_gi_im_piau = ''.join([siann, un, tiau])
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tlpa_tng_han_ji_piau_im(
piau_im=piau_im,
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
# 將人工輸入的【台語音標】,解構為【聲母】、【韻母】、【聲調】
tai_gi_im_piau = manual_input
siann, un, tiau = split_tai_gi_im_piau(tai_gi_im_piau)
# 依指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann,
un,
tiau
)
han_ji_u_piau_im = True

# 將人工輸入的【台語音標】置入【破音字庫】Dict
phua_im_ji.ka_phua_im_ji(han_ji, tai_gi_im_piau)

sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}] 【{han_ji_piau_im}】")
else:
"""無人工輸入則自動查找"""
else: # 無人工輸入,則自【漢字庫】查找作業
# 查找【破音字庫】,確認是否有此漢字
han_ji_u_piau_im = False
found = phua_im_ji.ca_phua_im_ji(han_ji)
if found: # 若【破音字庫】有此漢字
# 若【破音字庫】有此漢字
if found:
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(found)
tai_gi_im_piau = siann_bu + un_bu + tiau_ho
han_ji_piau_im = tng_uann_han_ji_piau_im(
Expand All @@ -213,26 +186,30 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
tiau_ho
)
han_ji_u_piau_im = True
else: # 若【破音字庫】無此漢字,則在資料庫中查找
sheet.range((row, col)).font.color = (255, 0, 0) # 將文字顏色設為【紅色】
sheet.range((row, col)).color = (255, 255, 0) # 將底色設為【黄色】
print(f"漢字:【{han_ji}】之注音【{tai_gi_im_piau}】取自【人工注音字典】。")
# 若【破音字庫】無此漢字,則在資料庫中查找
else:
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, hue_im=hue_im)
if not result:
msg = f"【{cell_value}】查無此字!"
msg = f"【{han_ji}】查無此字!"
else:
tai_gi_im_piau, han_ji_piau_im = cu_siann_un_tiau(
# 依【漢字庫】查找結果,輸出【台語音標】和【漢字標音】
tai_gi_im_piau, han_ji_piau_im = za_ji_kiat_ko_cut_piau_im(
result=result,
han_ji_khoo=han_ji_khoo,
piau_im=piau_im,
piau_im_huat=piau_im_huat
)
han_ji_u_piau_im = True
han_ji_u_piau_im = True

if han_ji_u_piau_im:
sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}] 【{han_ji_piau_im}】")
else:
print(f"({row}, {col_name}) = {msg}")
msg = f"{han_ji}: [{tai_gi_im_piau}] /【{han_ji_piau_im}】"

print(f"({row}, {col_name}) = {msg}")
index += 1

row += 4
Expand Down
32 changes: 16 additions & 16 deletions p740_Phua_Im_Ji.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ class PhuaImJi:
"""

def __init__(self, ji_tian_name="Phua_Im_Ji.json"):
# 破音字典存放漢字與注音
# 【人工注音字典】存放漢字與注音
self.phua_im_ji_tian = {}
self.Ji_Tian_Name = ji_tian_name


def ka_phua_im_ji(self, han_ji, piau_im):
"""
將【漢字】與【注音】加入破音字典
將【漢字】與【注音】加入【人工注音字典】
參數:
- han_ji: str,單一漢字。
Expand All @@ -26,7 +26,7 @@ def ka_phua_im_ji(self, han_ji, piau_im):
if len(han_ji) != 1:
raise ValueError("輸入的 char 必須是一個單一漢字。")
self.phua_im_ji_tian[han_ji] = piau_im
print(f"已將 {han_ji} 的注音「{piau_im}」加入破音字典。")
print(f"漢字:【{han_ji}】之注音【{piau_im}】已加入【人工注音字典】。")


def ca_phua_im_ji(self, han_ji):
Expand All @@ -37,39 +37,39 @@ def ca_phua_im_ji(self, han_ji):
- han_ji: str,單一漢字。
返回:
- str 或 None:若存在於破音字典則返回注音,否則返回 None。
- str 或 None:若存在於【人工注音字典】則返回注音,否則返回 None。
"""
return self.phua_im_ji_tian.get(han_ji, None)


def save_to_file(self):
"""
將破音字典以純文字 JSON 格式存入檔案。
將【人工注音字典】以純文字 JSON 格式存入檔案。
"""
file_path = self.Ji_Tian_Name
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(self.phua_im_ji_tian, f, ensure_ascii=False, indent=4)
print(f"破音字典已儲存至 {file_path}")
print(f"【人工注音字典】已儲存至 {file_path}")


def load_from_file(self):
"""
從 JSON 格式檔案讀取破音字典
從 JSON 格式檔案讀取【人工注音字典】
"""
file_path = self.Ji_Tian_Name
with open(file_path, 'r', encoding='utf-8') as f:
self.phua_im_ji_tian = json.load(f)
print(f"已從 {file_path} 載入破音字典")
print(f"已從 {file_path} 載入【人工注音字典】")


def dump_phua_im_ji_tian(self):
"""
在螢幕上輸出破音字典的內容,以純文字格式顯示。
在螢幕上輸出【人工注音字典】的內容,以純文字格式顯示。
"""
if not self.phua_im_ji_tian:
print("破音字典為空。")
print("【人工注音字典】為空。")
else:
print("破音字典內容如下:")
print("【人工注音字典】內容如下:")
print("{")
for han_ji, piau_im in self.phua_im_ji_tian.items():
print(f" '{han_ji}': '{piau_im}',")
Expand All @@ -78,20 +78,20 @@ def dump_phua_im_ji_tian(self):

def save_to_bin_file(self, file_path):
"""
將破音字典存入檔案
將【人工注音字典】存入檔案
"""
with open(file_path, 'wb') as f:
pickle.dump(self.phua_im_ji_tian, f)
print(f"破音字典已儲存至 {file_path}")
print(f"【人工注音字典】已儲存至 {file_path}")


def load_from_bin_file(self, file_path):
"""
從檔案讀取破音字典
從檔案讀取【人工注音字典】
"""
with open(file_path, 'rb') as f:
self.phua_im_ji_tian = pickle.load(f)
print(f"已從 {file_path} 載入破音字典")
print(f"已從 {file_path} 載入【人工注音字典】")


# 單元測試
Expand All @@ -116,7 +116,7 @@ def load_from_bin_file(self, file_path):
# 從 JSON 檔案讀取
phua_im_ji.load_from_file()

# 在螢幕上輸出破音字典
# 在螢幕上輸出【人工注音字典】
phua_im_ji.dump_phua_im_ji_tian()

# 查詢注音
Expand Down

0 comments on commit 2d86863

Please sign in to comment.