Skip to content

Commit

Permalink
修正【雅俗通】標音方法的錯誤
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Jan 9, 2025
1 parent bdf8883 commit 69b66d4
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 107 deletions.
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
"version": "0.2.0",
"configurations": [
{
"name": "p740_Phua_Im_Ji",
"name": "p750_將儲存格漢字另存成文字檔",
"type": "debugpy",
"request": "launch",
"program": "p740_Phua_Im_Ji.py",
"program": "p750_將儲存格漢字另存成文字檔.py",
"console": "integratedTerminal",
},
{
Expand Down
Binary file not shown.
211 changes: 107 additions & 104 deletions p702_Ca_Han_Ji_Thak_Im.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ def za_ji_kiat_ko_cut_piau_im(result, han_ji_khoo, piau_im, piau_im_huat):

def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話音", han_ji_khoo="河洛話", db_name='Ho_Lok_Ue.db', module_name='mod_河洛話', function_name='han_ji_ca_piau_im'):
"""查漢字讀音:依【漢字】查找【台語音標】,並依指定之【標音方法】輸出【漢字標音】"""
# 動態載入查找函數
han_ji_ca_piau_im = load_module_function(module_name, function_name)

# 連接指定資料庫
conn = sqlite3.connect(db_name)
cursor = conn.cursor()

# 初始化 PiauIm 類別,産生標音物件
piau_im = PiauIm(han_ji_khoo=han_ji_khoo)
piau_im_huat = wb.names['標音方法'].refers_to_range.value
Expand All @@ -69,134 +76,130 @@ def ca_han_ji_thak_im(wb, sheet_name='漢字注音', cell='V3', hue_im="白話
sheet.activate()
sheet.range('A1').select()

# 取得 V3 儲存格的字串
v3_value = sheet.range(cell).value

# 取得工作表能處理最多列數: 20 列
TOTAL_ROWS = int(wb.names['每頁總列數'].refers_to_range.value)
TOTAL_LINES = int(wb.names['每頁總列數'].refers_to_range.value)
row = 5

# 每列最多處理 15 字元
CHARS_PER_ROW = int(wb.names['每列總字數'].refers_to_range.value)
# 設定起始及結束的欄位 (【D欄=4】到【R欄=18】)
start = 4
end = start + CHARS_PER_ROW

total_length = len(v3_value)

# 動態載入查找函數
han_ji_ca_piau_im = load_module_function(module_name, function_name)

# 連接指定資料庫
conn = sqlite3.connect(db_name)
cursor = conn.cursor()

# 逐字處理字串,並填入對應的儲存格
row = 5
while row < TOTAL_ROWS:
EOF = False
line = 1
while line < TOTAL_LINES and not EOF:
# 設定【作用儲存格】為列首
sheet.range((row, 1)).select()
for col in range(start, end):
msg = ""
col_name = xw.utils.col_name(col)

# 取得當前字元
han_ji_u_piau_im = False
cell_value = sheet.range((row, col)).value

if cell_value == '\n':
if cell_value == 'φ':
EOF = True
break

# 若不為【標點符號】,則以【漢字】處理
if not is_punctuation(cell_value):
# 查找漢字讀音
han_ji = cell_value
han_ji_u_piau_im = False

# 依據【人工標音】欄是否有輸入,決定【漢字標音】之處理方式
manual_input = sheet.range((row-2, col)).value

if manual_input: # 若有人工輸入之處理作業
if '〔' in manual_input and '〕' in manual_input:
# 將人工輸入的〔台語音標〕轉換成【方音符號】
im_piau = manual_input.split('〔')[1].split('〕')[0]
siann, un, tiau = split_tai_gi_im_piau(im_piau)
tai_gi_im_piau = ''.join([siann, un, tiau])
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tlpa_tng_han_ji_piau_im(
piau_im=piau_im,
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
)
han_ji_u_piau_im = True
elif '【' in manual_input and '】' in manual_input:
# 將人工輸入的【方音符號】轉換成【台語音標】
han_ji_piau_im = manual_input.split('【')[1].split('】')[0]
siann, un, tiau = split_hong_im_hu_ho(han_ji_piau_im)
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
tai_gi_im_piau = hong_im_tng_tai_gi_im_piau(
siann=siann,
un=un,
tiau=tiau,
cursor=cursor,
)['台語音標']
han_ji_u_piau_im = True
else:
# 將人工輸入的【台語音標】,解構為【聲母】、【韻母】、【聲調】
tai_gi_im_piau = manual_input
siann, un, tiau = split_tai_gi_im_piau(tai_gi_im_piau)
# 依指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann,
un,
tiau
)
han_ji_u_piau_im = True

# 將人工輸入的【台語音標】置入【破音字庫】Dict
phua_im_ji.ka_phua_im_ji(han_ji, tai_gi_im_piau)
else: # 無人工輸入,則自【漢字庫】查找作業
# 查找【破音字庫】,確認是否有此漢字
found = phua_im_ji.ca_phua_im_ji(han_ji)
# 若【破音字庫】有此漢字
if found:
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(found)
tai_gi_im_piau = siann_bu + un_bu + tiau_ho
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann_bu,
un_bu,
tiau_ho
)
han_ji_u_piau_im = True
sheet.range((row, col)).font.color = (255, 0, 0) # 將文字顏色設為【紅色】
sheet.range((row, col)).color = (255, 255, 0) # 將底色設為【黄色】
print(f"漢字:【{han_ji}】之注音【{tai_gi_im_piau}】取自【人工注音字典】。")
# 若【破音字庫】無此漢字,則在資料庫中查找
else:
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, hue_im=hue_im)
if not result:
msg = f"【{han_ji}】查無此字!"
else:
# 依【漢字庫】查找結果,輸出【台語音標】和【漢字標音】
tai_gi_im_piau, han_ji_piau_im = za_ji_kiat_ko_cut_piau_im(
result=result,
han_ji_khoo=han_ji_khoo,
elif cell_value == '\n':
break
else:
# 若不為【標點符號】,則以【漢字】處理
if is_punctuation(cell_value):
msg = f"{cell_value}"
else:
# 查找漢字讀音
han_ji = cell_value

# 依據【人工標音】欄是否有輸入,決定【漢字標音】之處理方式
manual_input = sheet.range((row-2, col)).value

if manual_input: # 若有人工輸入之處理作業
if '〔' in manual_input and '〕' in manual_input:
# 將人工輸入的〔台語音標〕轉換成【方音符號】
im_piau = manual_input.split('〔')[1].split('〕')[0]
siann, un, tiau = split_tai_gi_im_piau(im_piau)
tai_gi_im_piau = ''.join([siann, un, tiau])
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tlpa_tng_han_ji_piau_im(
piau_im=piau_im,
piau_im_huat=piau_im_huat
piau_im_huat=piau_im_huat,
tai_gi_im_piau=tai_gi_im_piau
)
han_ji_u_piau_im = True
elif '【' in manual_input and '】' in manual_input:
# 將人工輸入的【方音符號】轉換成【台語音標】
han_ji_piau_im = manual_input.split('【')[1].split('】')[0]
siann, un, tiau = split_hong_im_hu_ho(han_ji_piau_im)
# 依使用者指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
tai_gi_im_piau = hong_im_tng_tai_gi_im_piau(
siann=siann,
un=un,
tiau=tiau,
cursor=cursor,
)['台語音標']
han_ji_u_piau_im = True
else:
# 將人工輸入的【台語音標】,解構為【聲母】、【韻母】、【聲調】
tai_gi_im_piau = manual_input
siann, un, tiau = split_tai_gi_im_piau(tai_gi_im_piau)
# 依指定之【標音方法】,將【台語音標】轉換成其所需之【漢字標音】
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann,
un,
tiau
)
han_ji_u_piau_im = True

if han_ji_u_piau_im:
sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
msg = f"{han_ji}: [{tai_gi_im_piau}] /【{han_ji_piau_im}】"

col_name = xw.utils.col_name(col)
print(f"({row}, {col_name}) = {msg}")
# 將人工輸入的【台語音標】置入【破音字庫】Dict
phua_im_ji.ka_phua_im_ji(han_ji, tai_gi_im_piau)
else: # 無人工輸入,則自【漢字庫】查找作業
# 查找【破音字庫】,確認是否有此漢字
found = phua_im_ji.ca_phua_im_ji(han_ji)
# 若【破音字庫】有此漢字
if found:
siann_bu, un_bu, tiau_ho = split_tai_gi_im_piau(found)
tai_gi_im_piau = siann_bu + un_bu + tiau_ho
han_ji_piau_im = tng_uann_han_ji_piau_im(
piau_im,
piau_im_huat,
siann_bu,
un_bu,
tiau_ho
)
han_ji_u_piau_im = True
sheet.range((row, col)).font.color = (255, 0, 0) # 將文字顏色設為【紅色】
sheet.range((row, col)).color = (255, 255, 0) # 將底色設為【黄色】
print(f"漢字:【{han_ji}】之注音【{tai_gi_im_piau}】取自【人工注音字典】。")
# 若【破音字庫】無此漢字,則在資料庫中查找
else:
result = han_ji_ca_piau_im(cursor=cursor, han_ji=han_ji, hue_im=hue_im)
if not result:
msg = f"【{han_ji}】查無此字!"
else:
# 依【漢字庫】查找結果,輸出【台語音標】和【漢字標音】
tai_gi_im_piau, han_ji_piau_im = za_ji_kiat_ko_cut_piau_im(
result=result,
han_ji_khoo=han_ji_khoo,
piau_im=piau_im,
piau_im_huat=piau_im_huat
)
han_ji_u_piau_im = True

if han_ji_u_piau_im:
sheet.range((row - 1, col)).value = tai_gi_im_piau
sheet.range((row + 1, col)).value = han_ji_piau_im
msg = f"{han_ji}: [{tai_gi_im_piau}] /【{han_ji_piau_im}】"

print(f"({row}, {col_name}) = {msg}")

# 每處理 15 個字元後,換到下一行
row += 4
print("\n")
row += 4

print("已完成【台語音標】和【方音符號】標注工作。")

Expand Down
1 change: 1 addition & 0 deletions p710_thiam_han_ji.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def fill_hanji_in_cells(wb, sheet_name='漢字注音', cell='V3'):
print("\n")
row += 4

sheet.range((row, start)).value = "φ"
# 保存 Excel 檔案
wb.save()

Expand Down
110 changes: 110 additions & 0 deletions p750_將儲存格漢字另存成文字檔.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import sys

import xlwings as xw


def export_han_ji_to_txt(wb, sheet_name='漢字注音', output_file='tmp.txt'):
"""
將 Excel 工作表中指定區域的漢字取出,儲存為一個純文字檔。
"""
# 選擇工作表
sheet = wb.sheets[sheet_name]
sheet.activate()

# 初始化儲存字串
han_ji_text = ""

# 取得總列數與每列總字數
TOTAL_LINES = int(wb.names['每頁總列數'].refers_to_range.value)
CHARS_PER_ROW = int(wb.names['每列總字數'].refers_to_range.value)

# 設定起始及結束的欄位(【D欄=4】到【R欄=18】)
row = 5
start_col = 4
end_col = start_col + CHARS_PER_ROW

# 從第 5 列、9 列、13 列等列取出漢字,並組合成純文字
line_text = ""
end_of_file = False
line = 1
while line < TOTAL_LINES:
# 設定【作用儲存格】為列首
sheet.range((row, 1)).select()
# 每列逐欄取出漢字
for col in range(start_col, end_col):
cell_value = sheet.range((row, col)).value

if cell_value == 'φ':
end_of_file = True
break
elif cell_value == '\n':
line_text += '\n'
break
else:
line_text += cell_value

# 若該列為空白列,則識作【檔案結尾(EOF)】
if end_of_file:
# han_ji_text += '\nEOF\n'
print(f"第 {row} 列為檔案結尾處,結束處理作業。")
break

# 輸出當前行處理的內容
# print(f"第 {row} 列的輸出內容:")
# print(line_text)

# 每處理 15 個字元後,換到下一行
row += 4
line += 1

# 將所有漢字寫入文字檔
with open(output_file, 'w', encoding='utf-8') as f:
f.write(line_text)

print(f"已成功將漢字輸出至檔案:{output_file}")


def dump_txt_file(file_path):
"""
在螢幕 Dump 純文字檔內容。
"""
print("\n【文字檔內容】:")
print("========================================\n")
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(content)
except FileNotFoundError:
print(f"無法找到檔案:{file_path}")


# 主函數示例
if __name__ == "__main__":
# 開啟 Excel 工作簿
# app = xw.App(visible=False)
# wb = app.books.open('your_excel_file.xlsx') # 修改為實際 Excel 檔案名稱
wb = None
# 使用已打開且處於作用中的 Excel 工作簿
try:
# 嘗試獲取當前作用中的 Excel 工作簿
wb = xw.apps.active.books.active
except Exception as e:
print(f"發生錯誤: {e}")
print("無法找到作用中的 Excel 工作簿")
sys.exit(2)

if not wb:
print("無法執行,可能原因:(1) 未指定輸入檔案;(2) 未找到作用中的 Excel 工作簿")
sys.exit(2)

# 設定純文字檔案名稱
output_file = 'tmp.txt'

# 呼叫函數將漢字導出為純文字檔
export_han_ji_to_txt(wb, output_file=output_file)

# 螢幕 Dump 檔案內容
dump_txt_file(output_file)

# 關閉工作簿和應用程式
# wb.close()
14 changes: 13 additions & 1 deletion tmp.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
°¶°¹±
觀自在菩薩,行深般若波羅蜜多時,照見五蘊皆空,度一切苦厄。

舍利子!色不異空,空不異色,色即是空,空即是色,受想行識,亦復如是。

舍利子!是諸法空相,不生不滅,不垢不淨,不增不減。是故空中無色,無受想行識。無眼耳鼻舌身意;無色聲香味觸法。

無眼界,乃至無意識界。無無明,亦無無明盡;乃至無老死,亦無老死盡。

無苦集滅道。無智,亦無得。以無所得故!菩提薩埵,依般若波羅蜜多故,心得罣礙,無罣礙故,無有恐怖,遠離顛倒、夢想,究竟湼槃。

三世諸佛,依般若波羅蜜多故,得阿耨多羅三藐三菩提!

故知般若波羅蜜多,是大神咒,是大明咒,是無上咒,是無等等咒。能除一切苦!真實不虛!

故說般若波羅蜜多咒,即說咒曰:揭諦,揭諦,波羅揭諦,波羅僧揭諦,菩提薩婆訶!

0 comments on commit 69b66d4

Please sign in to comment.