Skip to content

Commit

Permalink
變更漢字標音的格式及方法
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Jan 7, 2025
1 parent 4498231 commit 359e855
Show file tree
Hide file tree
Showing 14 changed files with 352 additions and 73 deletions.
7 changes: 7 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
// 如需詳細資訊,請瀏覽: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "p740_Phua_Im_Ji",
"type": "debugpy",
"request": "launch",
"program": "p740_Phua_Im_Ji.py",
"console": "integratedTerminal",
},
{
"name": "csv01_更新漢字表資料",
"type": "debugpy",
Expand Down
Binary file modified Ho_Lok_Ue.db
Binary file not shown.
4 changes: 4 additions & 0 deletions Phua_Im_Ji.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"行": "ㄒㄧㄥˊ",
"重": "ㄓㄨㄥˋ"
}
4 changes: 4 additions & 0 deletions a701_作業中活頁檔填入漢字.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import xlwings as xw

from mod_file_access import get_han_ji_khoo, get_sound_type, save_as_new_file
from p701_Clear_Cells import clear_han_ji_kap_piau_im
from p702_Ca_Han_Ji_Thak_Im import ca_han_ji_thak_im
from p709_reset_han_ji_cells import reset_han_ji_cells
from p710_thiam_han_ji import fill_hanji_in_cells
Expand Down Expand Up @@ -37,6 +38,9 @@
print("無法執行,可能原因:(1) 未指定輸入檔案;(2) 未找到作用中的 Excel 工作簿")
sys.exit(2)

# 將儲存格已填入之漢字及標音清除
clear_han_ji_kap_piau_im(wb)

# 將待注音的【漢字儲存格】,文字顏色重設為黑色(自動 RGB: 0, 0, 0);填漢顏色重設為無填滿
reset_han_ji_cells(wb)

Expand Down
4 changes: 4 additions & 0 deletions custom_pronunciation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"行": "ㄒㄧㄥˊ",
"重": "ㄓㄨㄥˋ"
}
Binary file added data.pkl
Binary file not shown.
91 changes: 91 additions & 0 deletions docs/《般若波羅蜜多心經》_十五音.html

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions mod_標音.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def split_hong_im_hu_ho(hong_im_hu_ho):
return [sheng_mu, yun_mu, str(tiau_hao)]


def choose_piau_im_method(piau_im, zu_im_huat, siann_bu, un_bu, tiau_ho):
def tng_uann_han_ji_piau_im(piau_im, zu_im_huat, siann_bu, un_bu, tiau_ho):
"""選擇並執行對應的注音方法"""
if zu_im_huat == "雅俗通":
return piau_im.NST_piau_im(siann_bu, un_bu, tiau_ho)
Expand Down Expand Up @@ -169,7 +169,7 @@ def tlpa_tng_han_ji_piau_im(piau_im, piau_im_huat, tai_gi_im_piau):
if siann_bu == "" or siann_bu == None:
siann_bu = "Ø"

han_ji_piau_im = choose_piau_im_method(
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann_bu,
Expand All @@ -188,7 +188,7 @@ def is_punctuation(char):
return False

# 可以根據需要擴充此列表以判斷各種標點符號
punctuation_marks = ",。!?;:、()「」『』《》……"
punctuation_marks = ",。!?;:、()「」『』《》……"
return char in punctuation_marks


Expand Down
Binary file not shown.
20 changes: 18 additions & 2 deletions p701_Clear_Cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,21 @@
import xlwings as xw


def clear_hanji_in_cells(wb, sheet_name='漢字注音', cell='V3'):
def clear_han_ji_kap_piau_im(wb, sheet_name='漢字注音'):
sheet = wb.sheets[sheet_name] # 選擇工作表
sheet.activate() # 將「漢字注音」工作表設為作用中工作表
sheet.range('A1').select() # 將 A1 儲存格設為作用儲存格

total_rows = wb.names['每頁總列數'].refers_to_range.value
cells_per_row = 4
end_of_rows = int((total_rows * cells_per_row ) + 2)
cells_range = f'D3:R{end_of_rows}'

sheet.range(cells_range).clear_contents() # 清除 C3:R{end_of_row} 範圍的內容



def clear_hanji_in_cells(wb, sheet_name='漢字注音', source_cell='V3', clear_source=False):
# 選擇指定的工作表
sheet = wb.sheets[sheet_name]

Expand Down Expand Up @@ -45,4 +59,6 @@ def clear_hanji_in_cells(wb, sheet_name='漢字注音', cell='V3'):
# =========================================================================
# (2) 清除原先已填入的漢字
# =========================================================================
sheet.range("V3").value = ""
if clear_source:
sheet.range(source_cell).value = ""
print(f"清空原先的漢字:{source_cell}")
135 changes: 73 additions & 62 deletions p702_Ca_Han_Ji_Thak_Im.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from mod_標音 import split_hong_im_hu_ho # 分解漢字標音
from mod_標音 import split_tai_gi_im_piau # 分解台語音標
from mod_標音 import tlpa_tng_han_ji_piau_im # 台語音標轉漢字標音
from mod_標音 import tng_uann_han_ji_piau_im # 台語音標轉台語音標
from mod_標音 import PiauIm
from p740_Phua_Im_Ji import PhuaImJi

# ==========================================================
# 注音法設定和共用變數
Expand Down Expand Up @@ -43,12 +45,50 @@ def choose_piau_im_method(piau_im, zu_im_huat, siann_bu, un_bu, tiau_ho):
return f"{siann}{un}{tiau_ho}"
return ""

def cu_siann_un_tiau(result, han_ji_khoo, piau_im, piau_im_huat):
"""查詢【漢字庫】取得之【查找結果】,將之切分:聲、韻、調"""
if han_ji_khoo == "河洛話":
#-----------------------------------------------------------------
# 【白話音】:依《河洛話漢字庫》標注【台語音標】和【方音符號】
#-----------------------------------------------------------------
# 將【台語音標】分解為【聲母】、【韻母】、【聲調】
siann_bu = result[0]['聲母']
un_bu = result[0]['韻母']
tiau_ho = result[0]['聲調']
if tiau_ho == "6":
# 若【聲調】為【6】,則將【聲調】改為【7】
tiau_ho = "7"
else:
#-----------------------------------------------------------------
# 【文讀音】:依《廣韻字庫》標注【台語音標】和【方音符號】
#-----------------------------------------------------------------
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(result[0]['標音'])
if siann_bu == "" or siann_bu == None:
siann_bu = "ø"


# 將【聲母】、【韻母】、【聲調】,合併成【台語音標】
# tai_gi_im_piau = siann_bu + un_bu + tiau_ho
tai_gi_im_piau = ''.join([siann_bu, un_bu, tiau_ho])

# 標音法為:【十五音】或【雅俗通】,且【聲母】為空值,則將【聲母】設為【ø】
if (piau_im_huat == "十五音" or piau_im_huat == "雅俗通") and (siann_bu == "" or siann_bu == None):
siann_bu = "ø"
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann_bu,
un_bu,
tiau_ho
)
return tai_gi_im_piau, han_ji_piau_im

def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話音", han_ji_khoo="河洛話", db_name='Ho_Lok_Ue.db', module_name='mod_河洛話', function_name='han_ji_ca_piau_im'):
# 初始化 PiauIm 類別,産生標音物件
piau_im = PiauIm(han_ji_khoo=han_ji_khoo)
piau_im_huat = wb.names['標音方法'].refers_to_range.value
# piau_im_huat = '方音符號'
phua_im_ji = PhuaImJi()

# 顯示「已輸入之拼音字母及注音符號」
named_range = wb.names['顯示注音輸入']
Expand Down Expand Up @@ -116,7 +156,7 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
han_ji = cell_value

manual_input = sheet.range((row-2, col)).value
if manual_input:
if manual_input: # 若【手動輸入】的欄位有輸入
if '〔' in manual_input and '〕' in manual_input:
# 將人工輸入的〔台語音標〕轉換成【方音符號】
im_piau = manual_input.split('〔')[1].split('〕')[0]
Expand Down Expand Up @@ -151,76 +191,47 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
tai_gi_im_piau=tai_gi_im_piau
)

# 將人工輸入的【台語音標】置入【破音字庫】Dict
phua_im_ji.ka_phua_im_ji(han_ji, tai_gi_im_piau)

sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}] 【{han_ji_piau_im}】")
else:
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, hue_im=hue_im)

if result:
if han_ji_khoo == "河洛話":
#-----------------------------------------------------------------
# 【白話音】:依《河洛話漢字庫》標注【台語音標】和【方音符號】
#-----------------------------------------------------------------
# 將【台語音標】分解為【聲母】、【韻母】、【聲調】
siann_bu = result[0]['聲母']
un_bu = result[0]['韻母']
tiau_ho = result[0]['聲調']
# if siann_bu == "" or siann_bu == None:
# siann_bu = "Ø"

if tiau_ho == "6":
# 若【聲調】為【6】,則將【聲調】改為【7】
tiau_ho = "7"

# 將【聲母】、【韻母】、【聲調】,合併成【台語音標】
# tai_gi_im_piau = siann_bu + un_bu + tiau_ho
tai_gi_im_piau = ''.join([siann_bu, un_bu, tiau_ho])

# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
# han_ji_piau_im = tlpa_tng_han_ji_piau_im(
# piau_im=piau_im,
# piau_im_huat=piau_im_huat,
# tai_gi_im_piau=tai_gi_im_piau
# )

zu_im_list = split_tai_gi_im_piau(tai_gi_im_piau)
if zu_im_list[0] == "" or zu_im_list[0] == None:
siann_bu = "Ø"
else:
siann_bu = zu_im_list[0]

han_ji_piau_im = choose_piau_im_method(
piau_im,
piau_im_huat,
siann_bu,
zu_im_list[1],
zu_im_list[2]
)
"""無人工輸入則自動查找"""
# 查找【破音字庫】,確認是否有此漢字
han_ji_u_piau_im = False
found = phua_im_ji.ca_phua_im_ji(han_ji)
if found: # 若【破音字庫】有此漢字
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(found)
tai_gi_im_piau = siann_bu + un_bu + tiau_ho
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann_bu,
un_bu,
tiau_ho
)
han_ji_u_piau_im = True
else: # 若【破音字庫】無此漢字,則在資料庫中查找
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, hue_im=hue_im)
if not result:
msg = f"【{cell_value}】查無此字!"
else:
#-----------------------------------------------------------------
# 【文讀音】:依《廣韻字庫》標注【台語音標】和【方音符號】
#-----------------------------------------------------------------
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(result[0]['標音'])

# 將【台語音標】分解為【聲母】、【韻母】、【聲調】
if siann_bu == "" or siann_bu == None:
siann_bu = "Ø"
tai_gi_im_piau = siann_bu + un_bu + tiau_ho

# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tlpa_tng_han_ji_piau_im(
tai_gi_im_piau, han_ji_piau_im = cu_siann_un_tiau(
result=result,
han_ji_khoo=han_ji_khoo,
piau_im=piau_im,
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
piau_im_huat=piau_im_huat
)
han_ji_u_piau_im = True

if han_ji_u_piau_im:
sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}] 【{han_ji_piau_im}】")
else:
msg = f"【{cell_value}】查無此字!"
if tai_gi_im_piau and han_ji_piau_im:
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}] 【{han_ji_piau_im}】")
else:
print(f"({row}, {col_name}) = {msg}")
print(f"({row}, {col_name}) = {msg}")

index += 1

Expand Down
11 changes: 5 additions & 6 deletions p730_Tng_Sing_Bang_Iah.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,12 @@ def tng_uann_piau_im(piau_im, zu_im_huat, siann_bu, un_bu, tiau_ho):
return f"{siann}{un}{tiau_ho}"
return ""

def concat_ruby_tag(wb, piau_im, zu_im_huat, han_ji, tai_gi_im_piau):

def concat_ruby_tag(wb, piau_im, han_ji, tai_gi_im_piau):
"""將漢字、台語音標及台語注音符號,合併成一個 Ruby Tag"""
zu_im_list = split_tai_gi_im_piau(tai_gi_im_piau)
if zu_im_list[0] == "" or zu_im_list[0] == None:
siann_bu = "Ø"
siann_bu = "ø"
else:
siann_bu = zu_im_list[0]

Expand Down Expand Up @@ -263,16 +264,14 @@ def build_web_page(wb, sheet, source_chars, total_length, page_type='含頁頭',
# =========================================================
# 將已注音之漢字加入【漢字注音表】
# =========================================================
# 在 Console 顯示目前處理的漢字,以便使用者可知目前進度
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}]")

ruby_tag = concat_ruby_tag(
wb=wb,
piau_im=piau_im, # 注音法物件
zu_im_huat=piau_im_huat,
han_ji=han_ji,
tai_gi_im_piau=tai_gi_im_piau
)
# 在 Console 顯示目前處理的漢字,以便使用者可知目前進度
print(f"({row}, {col_name}) = {han_ji} [{tai_gi_im_piau}]")

write_buffer += ruby_tag
index += 1
Expand Down
Loading

0 comments on commit 359e855

Please sign in to comment.