Skip to content

Commit

Permalink
新增 705 功能,更新【待注音漢字】及【漢字注音】表中的標音儲存格內容
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Jan 11, 2025
1 parent 3583ad5 commit 3e5b8d9
Show file tree
Hide file tree
Showing 12 changed files with 543 additions and 18 deletions.
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@
"console": "integratedTerminal",
},
{
"name": "a705_輸出己標音漢字",
"name": "a705_依漢字注音工作表內容更新待注音漢字",
"type": "debugpy",
"request": "launch",
"program": "a705_輸出己標音漢字.py",
"program": "a705_依漢字注音工作表內容更新待注音漢字.py",
"console": "integratedTerminal",
},
{
Expand Down
Binary file modified Ho_Lok_Ue.db
Binary file not shown.
7 changes: 3 additions & 4 deletions a701_作業中活頁檔填入漢字.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dotenv import load_dotenv

# 載入自訂模組
from mod_file_access import get_han_ji_khoo, get_sound_type, save_as_new_file
from mod_file_access import save_as_new_file
from p701_Clear_Cells import clear_han_ji_kap_piau_im
from p702_Ca_Han_Ji_Thak_Im import ca_han_ji_thak_im
from p709_reset_han_ji_cells import reset_han_ji_cells
Expand Down Expand Up @@ -86,13 +86,12 @@ def process(wb):
# ---------------------------------------------------------------------
# 為漢字查找標音
# ---------------------------------------------------------------------
type = get_sound_type(wb)
han_ji_khoo = get_han_ji_khoo(wb)
ue_im_lui_piat = wb.names['語音類型'].refers_to_range.value
han_ji_khoo = wb.names['漢字庫'].refers_to_range.value

if han_ji_khoo in ["河洛話", "廣韻"]:
db_name = DB_HO_LOK_UE if han_ji_khoo == "河洛話" else DB_KONG_UN
module_name = 'mod_河洛話' if han_ji_khoo == "河洛話" else 'mod_廣韻'
ue_im_lui_piat = type if han_ji_khoo == "白話音" else "文讀音"

# 查找漢字標音
logging.info(f"開始【漢字標音作業】 - {han_ji_khoo}: {type}")
Expand Down
4 changes: 2 additions & 2 deletions a702_查找及填入漢字標音.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def process(wb):
wb=wb,
sheet_name="漢字注音",
cell="V3",
ue_im_lui_piat="白話音",
ue_im_lui_piat=type,
han_ji_khoo="河洛話",
db_name="Ho_Lok_Ue.db",
module_name="mod_河洛話",
Expand All @@ -78,7 +78,7 @@ def process(wb):
wb=wb,
sheet_name="漢字注音",
cell="V3",
ue_im_lui_piat="文讀音",
ue_im_lui_piat=type,
han_ji_khoo="河洛話",
db_name="Ho_Lok_Ue.db",
module_name="mod_河洛話",
Expand Down
220 changes: 220 additions & 0 deletions a705_依漢字注音工作表內容更新待注音漢字.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
# =========================================================================
# 載入程式所需套件/模組/函式庫
# =========================================================================
import logging
import os
import sys
from pathlib import Path

# 載入第三方套件
import xlwings as xw
from dotenv import load_dotenv

from a701_作業中活頁檔填入漢字 import process as fill_hanji_in_cells

# 載入自訂模組
from mod_file_access import save_as_new_file

# =========================================================================
# 載入環境變數
# =========================================================================
load_dotenv()

# 預設檔案名稱從環境變數讀取
DB_HO_LOK_UE = os.getenv('DB_HO_LOK_UE', 'Ho_Lok_Ue.db')
DB_KONG_UN = os.getenv('DB_KONG_UN', 'Kong_Un.db')

# =========================================================================
# 設定日誌
# =========================================================================
logging.basicConfig(
filename='process_log.txt',
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)

def logging_process_step(msg):
print(msg)
logging.info(msg)

# =========================================================================
# 常數定義
# =========================================================================
# 定義 Exit Code
EXIT_CODE_SUCCESS = 0 # 成功
EXIT_CODE_NO_FILE = 1 # 無法找到檔案
EXIT_CODE_INVALID_INPUT = 2 # 輸入錯誤
EXIT_CODE_PROCESS_FAILURE = 3 # 過程失敗
EXIT_CODE_UNKNOWN_ERROR = 99 # 未知錯誤


# =========================================================================
# Local Function
# =========================================================================
def dump_txt_file(file_path):
"""
在螢幕 Dump 純文字檔內容。
"""
print("\n【文字檔內容】:")
print("========================================\n")
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(content)
except FileNotFoundError:
print(f"無法找到檔案:{file_path}")

# =========================================================================
# 本程式主要處理作業程序
# =========================================================================
def process(wb):
"""
將 Excel 工作表中指定區域的漢字取出,儲存為一個純文字檔。
"""
# 選擇工作表
sheet = wb.sheets['漢字注音']
sheet.activate()
#--------------------------------------------------------------------------
# 自【env】設定工作表,取得處理作業所需參數
#--------------------------------------------------------------------------

# 設定起始及結束的【列】位址(【第5列】、【第9列】、【第13列】等列)
TOTAL_LINES = int(wb.names['每頁總列數'].refers_to_range.value)
ROWS_PER_LINE = 4
start_row = 5
end_row = start_row + (TOTAL_LINES * ROWS_PER_LINE)
line = 1

# 設定起始及結束的【欄】位址(【D欄=4】到【R欄=18】)
CHARS_PER_ROW = int(wb.names['每列總字數'].refers_to_range.value)
start_col = 4
end_col = start_col + CHARS_PER_ROW

#--------------------------------------------------------------------------
# 作業處理:逐列取出漢字,組合成純文字檔
#--------------------------------------------------------------------------
logging_process_step(f"開始【處理作業】...")
han_ji_text = ""
EOF = False

# 逐列處理作業
for row in range(start_row, end_row, ROWS_PER_LINE):
# 若已到【結尾】或【超過總行數】,則跳出迴圈
if EOF or line > TOTAL_LINES:
break

# 設定【作用儲存格】為列首
Two_Empty_Cells = 0
sheet.range((row, 1)).select()

# 逐欄取出漢字處理
for col in range(start_col, end_col):
# 取得當前儲存格內含值
cell_value = sheet.range((row, col)).value
if cell_value == 'φ': # 讀到【結尾標示】
EOF = True
msg = "【文字終結】"
elif cell_value == '\n': # 讀到【換行標示】
han_ji_text += '\n'
msg = "【換行】"
elif cell_value == None: # 讀到【空白】
if Two_Empty_Cells == 0:
Two_Empty_Cells += 1
elif Two_Empty_Cells == 1:
EOF = True
msg = "【缺空】" # 表【儲存格】未填入任何字/符,不同於【空白】字元
else: # 讀到:漢字或標點符號
han_ji_text += cell_value
msg = cell_value

# 顯示處理進度
col_name = xw.utils.col_name(col) # 取得欄位名稱
print(f"({row}, {col_name}) = {msg}")

# 若讀到【換行】或【文字終結】,跳出逐欄取字迴圈
if msg == "【換行】" or EOF:
break

# 每當處理一行 15 個漢字後,亦換到下一行
print("\n")
line += 1
row += 4

#--------------------------------------------------------------------------
# 更新【待注音漢字】儲存格內容
#--------------------------------------------------------------------------
sheet.range('V3').value = han_ji_text
logging_process_step(f"已更新【待注音漢字】儲存格!")

#--------------------------------------------------------------------------
# 更新【漢字標音】工作表
#--------------------------------------------------------------------------
fill_hanji_in_cells(wb)
logging_process_step(f"已更新【漢字注音】工作表中的【台語音標】與【漢字標音】儲存格!")

# 作業結束前處理
logging_process_step(f"完成【處理作業】...")
return EXIT_CODE_SUCCESS


# =========================================================================
# 程式主要作業流程
# =========================================================================
def main():
# =========================================================================
# (1) 取得專案根目錄。
# =========================================================================
current_file_path = Path(__file__).resolve()
project_root = current_file_path.parent
logging_process_step(f"專案根目錄為: {project_root}")

# =========================================================================
# (2) 若無指定輸入檔案,則獲取當前作用中的 Excel 檔案並另存新檔。
# =========================================================================
wb = None
# 使用已打開且處於作用中的 Excel 工作簿
try:
# 嘗試獲取當前作用中的 Excel 工作簿
wb = xw.apps.active.books.active
except Exception as e:
logging_process_step(f"發生錯誤: {e}")
logging.error(f"無法找到作用中的 Excel 工作簿: {e}", exc_info=True)
return EXIT_CODE_NO_FILE

if not wb:
logging_process_step("無法作業,因未無任何 Excel 檔案己開啟。")
return EXIT_CODE_NO_FILE

try:
# =========================================================================
# (3) 執行【處理作業】
# =========================================================================
result_code = process(wb)
if result_code != EXIT_CODE_SUCCESS:
logging_process_step("處理作業失敗,過程中出錯!")
return result_code

except Exception as e:
print(f"執行過程中發生未知錯誤: {e}")
logging.error(f"執行過程中發生未知錯誤: {e}", exc_info=True)
return EXIT_CODE_UNKNOWN_ERROR

finally:
if wb:
wb.save()
# 是否關閉 Excel 視窗可根據需求決定
# xw.apps.active.quit() # 確保 Excel 被釋放資源,避免開啟殘留
logging.info("釋放 Excel 資源,處理完成。")

# 結束作業
logging.info("作業成功完成!")
return EXIT_CODE_SUCCESS


if __name__ == "__main__":
exit_code = main()
if exit_code == EXIT_CODE_SUCCESS:
print("作業正常結束!")
else:
print(f"作業異常終止,錯誤代碼為: {exit_code}")
sys.exit(exit_code)
File renamed without changes.
10 changes: 8 additions & 2 deletions mod_標音.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,10 @@ def NST_piau_im(self, siann_bu, un_bu, tiau_ho):
tiau_ho = replace_superscript_digits(str(tiau_ho))
tiau_ho = 7 if int(tiau_ho) == 6 else int(tiau_ho)

siann = self.Siann_Bu_Dict[siann_bu][piau_im_huat]
if siann_bu == "" or siann_bu == None or siann_bu == "Ø":
siann = "英"
else:
siann = self.Siann_Bu_Dict[siann_bu][piau_im_huat]
un = self.Un_Bu_Dict[un_bu][piau_im_huat]
# tiau = self.TONE_MARKS[piau_im_huat][int(tiau_ho)]
tiau = Tiau_Ho_Remap[tiau_ho]
Expand All @@ -762,7 +765,10 @@ def SNI_piau_im(self, siann_bu, un_bu, tiau_ho):
tiau_ho = replace_superscript_digits(str(tiau_ho))
tiau_ho = 7 if int(tiau_ho) == 6 else int(tiau_ho)

siann = self.Siann_Bu_Dict[siann_bu][piau_im_huat]
if siann_bu == "" or siann_bu == None or siann_bu == "Ø":
siann = "英"
else:
siann = self.Siann_Bu_Dict[siann_bu][piau_im_huat]
un = self.Un_Bu_Dict[un_bu][piau_im_huat]
# tiau = self.TONE_MARKS[piau_im_huat][int(tiau_ho)]
tiau = Tiau_Ho_Remap[tiau_ho]
Expand Down
8 changes: 4 additions & 4 deletions mod_河洛話.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# 白話音:常用度 > 0.40;最常用的讀音其值為 0.60,次常用的讀音其值為 0.50;其餘則使用數值 0.59 ~ 0.41。
# 其 它:常用度 > 0.00;使用數值 0.40 ~ 0.01;使用時機為:(1)方言地方腔;(2) 罕見發音;(3) 尚未查證屬文讀音或白話音 。
# ==========================================================
def han_ji_ca_piau_im(cursor, han_ji, hue_im="文讀音"):
def han_ji_ca_piau_im(cursor, han_ji, ue_im_lui_piat="文讀音"):
"""
根據漢字查詢其台羅音標及相關讀音資訊,並將台羅音標轉換為台語音標。
若資料紀錄在常用度欄位儲存值為空值(NULL),則將其視為 0,因此可排在查詢結果的最後。
Expand All @@ -28,11 +28,11 @@ def han_ji_ca_piau_im(cursor, han_ji, hue_im="文讀音"):
common_reading_condition = "常用度 >= 0.81 AND 常用度 <= 1.0"

# 根據不同讀音類型,添加額外的查詢條件
if hue_im == "文讀音":
if ue_im_lui_piat == "文讀音":
reading_condition = f"({common_reading_condition}) OR (常用度 >= 0.61 AND 常用度 < 0.81)"
elif hue_im == "白話音":
elif ue_im_lui_piat == "白話音":
reading_condition = f"({common_reading_condition}) OR (常用度 > 0.40 AND 常用度 < 0.61)"
elif hue_im == "其它":
elif ue_im_lui_piat == "其它":
reading_condition = "常用度 > 0.00 AND 常用度 <= 0.40"
else:
reading_condition = "1=1" # 查詢所有
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion p702_Ca_Han_Ji_Thak_Im.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', ue_im_lui_piat="
print(f"漢字:【{han_ji}】之注音【{tai_gi_im_piau}】取自【人工注音字典】。")
# 若【破音字庫】無此漢字,則在資料庫中查找
else:
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, hue_im=ue_im_lui_piat)
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, ue_im_lui_piat=ue_im_lui_piat)
if not result:
msg = f"【{han_ji}】查無此字!"
else:
Expand Down
Loading

0 comments on commit 3e5b8d9

Please sign in to comment.