Skip to content

Commit

Permalink
完善【人工手動更正漢字標音作業流程】
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Jan 13, 2025
1 parent 72011db commit c5dd6c2
Show file tree
Hide file tree
Showing 7 changed files with 394 additions and 114 deletions.
7 changes: 7 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@
"program": "a705_依漢字注音工作表內容更新待注音漢字.py",
"console": "integratedTerminal",
},
{
"name": "a706_手動更正漢字標音",
"type": "debugpy",
"request": "launch",
"program": "a706_手動更正漢字標音.py",
"console": "integratedTerminal",
},
{
"name": "a710_批次式漢字標音及網頁製作",
"type": "debugpy",
Expand Down
247 changes: 241 additions & 6 deletions a702_查找及填入漢字標音.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# =========================================================================
import logging
import os
import sqlite3
import sys
from pathlib import Path

Expand All @@ -11,8 +12,21 @@
from dotenv import load_dotenv

# 載入自訂模組
from mod_file_access import get_han_ji_khoo, get_sound_type, save_as_new_file
from p702_Ca_Han_Ji_Thak_Im import ca_han_ji_thak_im
from mod_excel_access import (
get_han_ji_khoo,
get_tai_gi_by_han_ji,
get_value_by_name,
maintain_han_ji_koo,
)
from mod_file_access import get_sound_type, load_module_function, save_as_new_file
from mod_標音 import PiauIm # 漢字標音物件
from mod_標音 import hong_im_tng_tai_gi_im_piau # 方音符號轉台語音標
from mod_標音 import is_punctuation # 是否為標點符號
from mod_標音 import siann_un_tiau_tng_piau_im # 声、韻、調轉台語音標
from mod_標音 import split_hong_im_hu_ho # 分解漢字標音
from mod_標音 import split_tai_gi_im_piau # 分解台語音標
from mod_標音 import tlpa_tng_han_ji_piau_im # 台語音標轉台語音標
from p740_Phua_Im_Ji import PhuaImJi # 破音字物件

# =========================================================================
# 載入環境變數
Expand Down Expand Up @@ -49,19 +63,240 @@ def logging_process_step(msg):
# =========================================================================
# 作業程序
# =========================================================================
def ca_ji_kiat_ko_tng_piau_im(result, han_ji_khoo: str, piau_im: PiauIm, piau_im_huat: str):
"""查字結果出標音:查詢【漢字庫】取得之【查找結果】,將之切分:聲、韻、調"""
if han_ji_khoo == "河洛話":
#-----------------------------------------------------------------
# 【白話音】:依《河洛話漢字庫》標注【台語音標】和【方音符號】
#-----------------------------------------------------------------
# 將【台語音標】分解為【聲母】、【韻母】、【聲調】
siann_bu = result[0]['聲母']
un_bu = result[0]['韻母']
tiau_ho = result[0]['聲調']
if tiau_ho == "6":
# 若【聲調】為【6】,則將【聲調】改為【7】
tiau_ho = "7"
else:
#-----------------------------------------------:------------------
# 【文讀音】:依《廣韻字庫》標注【台語音標】和【方音符號】
#-----------------------------------------------------------------
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(result[0]['標音'])
if siann_bu == "" or siann_bu == None:
siann_bu = "ø"

# 將【聲母】、【韻母】、【聲調】,合併成【台語音標】
# tai_gi_im_piau = siann_bu + un_bu + tiau_ho
tai_gi_im_piau = ''.join([siann_bu, un_bu, tiau_ho])

# 標音法為:【十五音】或【雅俗通】,且【聲母】為空值,則將【聲母】設為【ø】
if (piau_im_huat == "十五音" or piau_im_huat == "雅俗通") and (siann_bu == "" or siann_bu == None):
siann_bu = "ø"
han_ji_piau_im = siann_un_tiau_tng_piau_im(
piau_im,
piau_im_huat,
siann_bu,
un_bu,
tiau_ho
)
return tai_gi_im_piau, han_ji_piau_im


def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', ue_im_lui_piat="白話音", han_ji_khoo="河洛話", db_name='Ho_Lok_Ue.db', module_name='mod_河洛話', function_name='han_ji_ca_piau_im'):
"""查漢字讀音:依【漢字】查找【台語音標】,並依指定之【標音方法】輸出【漢字標音】"""
# 動態載入查找函數
han_ji_ca_piau_im = load_module_function(module_name, function_name)

# 連接指定資料庫
conn = sqlite3.connect(db_name)
cursor = conn.cursor()

# 建置 PiauIm 物件,供作漢字拼音轉換作業
han_ji_khoo = get_value_by_name(wb=wb, name='漢字庫')
piau_im = PiauIm(han_ji_khoo=han_ji_khoo)
piau_im_huat = get_value_by_name(wb=wb, name='標音方法')

# 建置自動及人工漢字標音字庫工作表:(1)【漢字庫工作表】;(2)【人工標音字庫工作表】
han_ji_koo_sheet = get_han_ji_khoo(wb=wb, sheet_name='漢字庫')
jin_kang_piau_im = get_han_ji_khoo(wb=wb, sheet_name='人工標音字庫')

# 指定【漢字注音】工作表為【作用工作表】
sheet = wb.sheets[sheet_name]
sheet.activate()

# 設定起始及結束的【列】位址(【第5列】、【第9列】、【第13列】等列)
TOTAL_LINES = int(wb.names['每頁總列數'].refers_to_range.value)
ROWS_PER_LINE = 4
start_row = 5
end_row = start_row + (TOTAL_LINES * ROWS_PER_LINE)
line = 1

# 設定起始及結束的【欄】位址(【D欄=4】到【R欄=18】)
CHARS_PER_ROW = int(wb.names['每列總字數'].refers_to_range.value)
start_col = 4
end_col = start_col + CHARS_PER_ROW

# 逐列處理作業
EOF = False
for row in range(start_row, end_row, ROWS_PER_LINE):
# 若已到【結尾】或【超過總行數】,則跳出迴圈
if EOF or line > TOTAL_LINES:
break

# 設定【作用儲存格】為列首
Two_Empty_Cells = 0
sheet.range((row, 1)).select()

# 逐欄取出漢字處理
for col in range(start_col, end_col):
# 取得當前儲存格內含值
han_ji_u_piau_im = False
msg = ""
cell_value = sheet.range((row, col)).value

if cell_value == 'φ':
EOF = True
msg = "【文字終結】"
elif cell_value == '\n':
msg = "【換行】"
elif cell_value == None:
if Two_Empty_Cells == 0:
Two_Empty_Cells += 1
elif Two_Empty_Cells == 1:
EOF = True
msg = "【缺空】" # 表【儲存格】未填入任何字/符,不同於【空白】字元
else:
# 若不為【標點符號】,則以【漢字】處理
if is_punctuation(cell_value):
msg = f"{cell_value}"
else:
# 查找漢字讀音
han_ji = cell_value

# 依據【人工標音】欄是否有輸入,決定【漢字標音】之處理方式
manual_input = sheet.range((row-2, col)).value

if manual_input: # 若有人工輸入之處理作業
if '〔' in manual_input and '〕' in manual_input:
# 將人工輸入的〔台語音標〕轉換成【方音符號】
im_piau = manual_input.split('〔')[1].split('〕')[0]
tai_gi_im_piau = im_piau
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tlpa_tng_han_ji_piau_im(
piau_im=piau_im,
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
)
han_ji_u_piau_im = True
elif '【' in manual_input and '】' in manual_input:
# 將人工輸入的【方音符號】轉換成【台語音標】
han_ji_piau_im = manual_input.split('【')[1].split('】')[0]
siann, un, tiau = split_hong_im_hu_ho(han_ji_piau_im)
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
tai_gi_im_piau = hong_im_tng_tai_gi_im_piau(
siann=siann,
un=un,
tiau=tiau,
cursor=cursor,
)['台語音標']
han_ji_u_piau_im = True
else:
# 將人工輸入的【台語音標】,解構為【聲母】、【韻母】、【聲調】
tai_gi_im_piau = manual_input
# 依指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tlpa_tng_han_ji_piau_im(
piau_im=piau_im,
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
)
han_ji_u_piau_im = True

# 將人工輸入的【台語音標】置入【破音字庫】Dict
maintain_han_ji_koo(sheet=jin_kang_piau_im,
han_ji=han_ji,
tai_gi=tai_gi_im_piau,
show_msg=False)
else: # 無人工輸入,則自【漢字庫】查找作業
# 查找【人工標音字庫】,確認是否有此漢字
tai_gi_im_piau = get_tai_gi_by_han_ji(jin_kang_piau_im, han_ji)
found = True if tai_gi_im_piau else False
# 若【破音字庫】有此漢字
if found:
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(tai_gi_im_piau)
tai_gi_im_piau = siann_bu + un_bu + tiau_ho
han_ji_piau_im = siann_un_tiau_tng_piau_im(
piau_im,
piau_im_huat,
siann_bu,
un_bu,
tiau_ho
)
han_ji_u_piau_im = True
sheet.range((row, col)).font.color = (255, 0, 0) # 將文字顏色設為【紅色】
sheet.range((row, col)).color = (255, 255, 0) # 將底色設為【黄色】
print(f"漢字:【{han_ji}】之注音【{tai_gi_im_piau}】取自【人工注音字典】。")
# 若【人工標音字庫】無此漢字,則在資料庫中查找
else:
result = han_ji_ca_piau_im(cursor=cursor,
han_ji=han_ji,
ue_im_lui_piat=ue_im_lui_piat)
if not result:
msg = f"【{han_ji}】查無此字!"
else:
# 依【漢字庫】查找結果,輸出【台語音標】和【漢字標音】
tai_gi_im_piau, han_ji_piau_im = ca_ji_kiat_ko_tng_piau_im(
result=result,
han_ji_khoo=han_ji_khoo,
piau_im=piau_im,
piau_im_huat=piau_im_huat
)
han_ji_u_piau_im = True

if han_ji_u_piau_im:
maintain_han_ji_koo(sheet=han_ji_koo_sheet,
han_ji=han_ji,
tai_gi=tai_gi_im_piau,
show_msg=False)
sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
msg = f"{han_ji}: [{tai_gi_im_piau}] /【{han_ji_piau_im}】"

# 顯示處理進度
col_name = xw.utils.col_name(col) # 取得欄位名稱
print(f"({row}, {col_name}) = {msg}")

# 若讀到【換行】或【文字終結】,跳出逐欄取字迴圈
if msg == "【換行】" or EOF:
break

# 每當處理一行 15 個漢字後,亦換到下一行
print("\n")
line += 1
row += 4

#----------------------------------------------------------------------
# 作業處理用的 row 迴圈與 col 迴圈己終結
#----------------------------------------------------------------------
# 關閉資料庫連線
conn.close()

# 作業結束前處理
wb.save()
print("已完成【台語音標】和【漢字標音】標注工作。")
return EXIT_CODE_SUCCESS


def process(wb):
# ------------------------------------------------------------------------------
# 指定【作業中工作表】
# 指定【作業工作表】為【漢字注音】工作表
# ------------------------------------------------------------------------------
sheet = wb.sheets["漢字注音"] # 選擇工作表
sheet.activate() # 將「漢字注音」工作表設為作用中工作表
sheet.range("A1").select() # 將 A1 儲存格設為作用儲存格

# ------------------------------------------------------------------------------
# 為漢字查找讀音,漢字上方填:【台語音標】;漢字下方填使用者指定之【漢字標音】
# ------------------------------------------------------------------------------
type = get_sound_type(wb) # 取得【語音類型】,判別使用【白話音】或【文讀音】何者。
han_ji_khoo = get_han_ji_khoo(wb)
type = get_value_by_name(wb, '語音類型') # 取得【語音類型】,判別使用【白話音】或【文讀音】何者。
han_ji_khoo = get_value_by_name(wb, '漢字庫')
if han_ji_khoo == "河洛話" and type == "白話音":
ca_han_ji_thak_im(
wb=wb,
Expand Down
Loading

0 comments on commit c5dd6c2

Please sign in to comment.