Skip to content

Commit

Permalink
新增 "a500_予我廣韻標音" 功能
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Apr 13, 2024
1 parent 96f937a commit 3a0a68e
Show file tree
Hide file tree
Showing 10 changed files with 339 additions and 3 deletions.
7 changes: 7 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@
"args": [
"德紅"
]
},
{
"name": "a500_予我廣韻標音",
"type": "debugpy",
"request": "launch",
"program": "a500_予我廣韻標音.py",
"console": "integratedTerminal",
}
]
}
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.autopep8"
"editor.defaultFormatter": "ms-python.black-formatter"
},
"python.formatting.provider": "none",
"sqltools.connections": [
Expand Down
Binary file modified Kong_Un_V2.db
Binary file not shown.
Binary file removed Kong_Un_V2.db-journal
Binary file not shown.
116 changes: 116 additions & 0 deletions a500_予我廣韻標音.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#================================================================
# 《予我廣韻標音》
# 使用《廣韻》作為漢字標讀音之依據。
#================================================================
import getopt
import os
import sys

import xlwings as xw

import settings
from p000_import_source_data import main_run as san_sing_han_ji_tsu_im_paiau
from p200_Iong_Nga_Siok_Thong_Zu_Im import main_run as hoo_gua_tsu_im
from p300_Kong_Un_Cha_Ji_Tian import Kong_Un_Piau_Im as tsa_ji_tian_tshue_tsu_im


def get_cmd_input(gargv):
arg_input = ""
arg_output = ""
arg_user = ""
arg_help = "{0} -i <input> -u <user> -o <output>".format(argv[0])

try:
opts, args = getopt.getopt( # pyright: ignore
argv[1:], "hi:u:o:", ["help", "input=", "user=", "output="]
)
except Exception as e:
print(e)
print(arg_help)
sys.exit(2)

for opt, arg in opts:
if opt in ("-h", "--help"):
print(arg_help) # print the help message
sys.exit(2)
elif opt in ("-i", "--input"):
arg_input = arg
elif opt in ("-u", "--user"):
arg_user = arg
elif opt in ("-o", "--output"):
arg_output = arg

print("input:", arg_input)
print("user:", arg_user)
print("output:", arg_output)

return {
"input": arg_input,
"user": arg_user,
"output": arg_output,
}

def main():
# =========================================================================
# (1) 取得需要注音的「檔案名稱」及其「目錄路徑」。
# =========================================================================
# 取得 Input 檔案名稱
file_path = settings.get_input_file_path()
if not file_path:
print("未設定 .env 檔案")
# sys.exit(2)
opts = get_cmd_input(sys.argv)
if opts["input"] != "":
CONVERT_FILE_NAME = opts["input"]
else:
CONVERT_FILE_NAME = "Piau-Tsu-Im"
else:
CONVERT_FILE_NAME = file_path
print(f"CONVERT_FILE_NAME = {CONVERT_FILE_NAME}")

# =========================================================================
# (2) 建置「漢字注音表」
# 將存放在「工作表1」的「漢字」文章,製成「漢字注音表」以便填入注音。
# =========================================================================
san_sing_han_ji_tsu_im_paiau(CONVERT_FILE_NAME)

# =========================================================================
# (3) 在字典查注音,填入漢字注音表。
# =========================================================================
tsa_ji_tian_tshue_tsu_im(CONVERT_FILE_NAME)

# =========================================================================
# (4) 將已注音之「漢字注音表」,製作成 HTML 格式之「注音/拼音/標音」網頁。
# =========================================================================
hoo_gua_tsu_im(CONVERT_FILE_NAME)

# ==========================================================
# 檢查「缺字表」狀態
# ==========================================================
# 指定來源工作表
source_sheet = xw.Book(CONVERT_FILE_NAME).sheets["缺字表"]
# 取得工作表內總列數
end_row_no = (
source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row
)
if end_row_no > 1:
print(f"總計字典查不到注音的漢字共:{end_row_no}個。")

# =========================================================================
# (5) 依據《文章標題》另存新檔。
# =========================================================================
wb = xw.Book(CONVERT_FILE_NAME)
setting_sheet = wb.sheets["env"]
new_file_name = str(
setting_sheet.range("C4").value
).strip()
new_file_path = os.path.join(
".\\output",
f"【河洛話注音】{new_file_name}" + ".xlsx")

# 儲存新建立的工作簿
wb.save(new_file_path)


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions a800_漢字查廣韻擬音_SQLite.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

import sys

from mod_Query_for_Tshiat_Gu import query_tshiat_gu_siong_ji, query_tshiat_gu_ha_ji
from mod_huan_tshiat import tshu_tiau, query_tiau_ho
from mod_huan_tshiat import query_tiau_ho, tshu_tiau
from mod_Query_for_Tshiat_Gu import query_tshiat_gu_ha_ji, query_tshiat_gu_siong_ji
from mod_於字典網站查詢漢字之廣韻切語發音 import fetch_kong_un_info


Expand Down
21 changes: 21 additions & 0 deletions mod_廣韻.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,24 @@ def query_table_by_id(cursor, table_name, fields, id):

# 回傳字典列表
return dict_results


# =========================================================
# 判斷調號
# =========================================================
def piau_tiau_ho(ji_tian_piau_im):
tiau_ho_list = {
'清平': 1,
'清上': 2,
'清去': 3,
'清入': 4,
'濁平': 5,
'濁上': 2,
'濁去': 7,
'濁入': 8,
}
siong_ji_cing_tok = ji_tian_piau_im['清濁']
cing_tok = siong_ji_cing_tok[-1]
sing_tiau = ji_tian_piau_im['調']
su_sing_pat_tiau = tiau_ho_list[ f"{cing_tok}{sing_tiau}" ]
return su_sing_pat_tiau
Binary file modified output/Piau-Tsu-Im.xlsx
Binary file not shown.
Binary file modified output/【河洛話注音】文章標題.xlsx
Binary file not shown.
192 changes: 192 additions & 0 deletions p300_Kong_Un_Cha_Ji_Tian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import re
import sqlite3

import xlwings as xw

from mod_廣韻 import han_ji_cha_piau_im, piau_tiau_ho

# 專案全域常數
# from config_dev_env import DATABASE
DATABASE = "Kong_Un_V2.db"


def Kong_Un_Piau_Im(CONVERT_FILE_NAME):
# ==========================================================
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號
# 聲母、韻母、調號,分別存放在 C、D、E 欄
# ==========================================================

# 指定提供來源的【檔案】
file_path = CONVERT_FILE_NAME
wb = xw.Book(file_path)

# 指定提供來源的【工作表】;及【總列數】
source_sheet = wb.sheets["漢字注音表"]
end_of_row_no = (
source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row
)
end_of_row_no = int(end_of_row_no) - 1
print(f"end_row = {end_of_row_no}")

# ==========================================================
# 備妥程式需使用之工作表
# ==========================================================
sheet_name_list = [
"缺字表",
"字庫表",
]
# ----------------------------------------------------------
# 檢查工作表是否已存在?
# 若已存在,則清除工作表內容;
# 若不存在,則新增工作表
# ----------------------------------------------------------
for sheet_name in sheet_name_list:
sheet = wb.sheets[sheet_name]
try:
sheet.select()
sheet.clear()
continue
except Exception as e:
# CommandError 的 Exception 發生日,表工作表不存在
# 新增程式需使用之工作表
print(e)
wb.sheets.add(name=sheet_name)

khiam_ji_piau = wb.sheets["缺字表"]
ji_khoo_piau = wb.sheets["字庫表"]

# ==========================================================
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號
# 聲母、韻母、調號,分別存放在 C、D、E 欄
# ==========================================================
han_ji_cu_im_piau = wb.sheets["漢字注音表"]
han_ji_cu_im_piau.select()

# =========================================================="
# 資料庫",
# =========================================================="
conn = sqlite3.connect(DATABASE)
db_cursor = conn.cursor()
source_index = 1 # index for source sheet
target_index = 1
ji_khoo_index = 1
khiam_ji_index = 1

while source_index <= end_of_row_no:
print(f"row = {source_index}")
# 自 source_sheet 取出一個「欲查注音的漢字」(beh_piau_im_e_han_ji)
beh_piau_im_e_han_ji = str(
source_sheet.range("A" + str(source_index)).value
).strip()

# =========================================================
# 如是空白或換行,處理換行
# =========================================================
if beh_piau_im_e_han_ji == " " or beh_piau_im_e_han_ji == "":
target_index += 1
source_index += 1
continue
elif beh_piau_im_e_han_ji == "\n":
han_ji_cu_im_piau.range("A" + str(target_index)).value = "\n"
target_index += 1
source_index += 1
continue

# =========================================================
# 若取出之字為標點符號,則跳過,並繼續取下一個漢字。
# =========================================================
piau_tiam_1 = r"[,、:;.。?!()「」【】《》“]"
piau_tiam_2 = r"[\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F\uFE50-\uFE5E]" # noqa: E501
# piau_tiam = r"[\u2013-\u2026\u3000-\u303F\uFE50-\uFF20]"
piau_tiam = f"{piau_tiam_1}|{piau_tiam_2}"
is_piau_tiam = re.search(piau_tiam, beh_piau_im_e_han_ji, re.M | re.I)
if is_piau_tiam:
target_index += 1
source_index += 1
continue

# =========================================================
# 在【字庫】資料庫查找【注音碼】
# SQL 查詢指令:自字庫查找某漢字之注音碼
# =========================================================
kong_un_piau_im = han_ji_cha_piau_im(db_cursor, beh_piau_im_e_han_ji)

# =========================================================
# 若是查不到漢字的注音碼,在【缺字表】做記錄
# =========================================================
if not kong_un_piau_im:
print(f"廣韻字典查不到此漢字:【{beh_piau_im_e_han_ji}】!!")
# 記錄【缺字表】的【列號】
khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index
# 記錄【缺字表】的【漢字】
khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_piau_im_e_han_ji
# 記錄【漢字注音表】的【列號】
khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index
khiam_ji_index += 1
target_index += 1
source_index += 1
continue

# =========================================================
# 自【字庫】查到的【漢字】,取出:聲母、韻母、調號
# =========================================================
piau_im_tsong_soo = len(kong_un_piau_im)
piau_im = kong_un_piau_im[0]
han_ji_id = piau_im['漢字識別號']
sing_bu = piau_im['上字標音'] if piau_im['上字標音'] != "Ø" else "q"
un_bu = piau_im['下字標音']
tiau_ho = piau_tiau_ho(piau_im)
cu_im = f"{sing_bu}{un_bu}{tiau_ho}"

# =========================================================
# 寫入:【漢字注音表】
# =========================================================
han_ji_cu_im_piau.range("B" + str(target_index)).value = cu_im
han_ji_cu_im_piau.range("C" + str(target_index)).value = sing_bu
han_ji_cu_im_piau.range("D" + str(target_index)).value = un_bu
han_ji_cu_im_piau.range("E" + str(target_index)).value = tiau_ho
han_ji_cu_im_piau.range("F" + str(target_index)).value = piau_im_tsong_soo

# =========================================================
# 若是查到漢字有一個以上的注音碼,在【字庫表】做記錄
# ji_khoo_sheet = wb.sheets["字庫表"]
# =========================================================
if piau_im_tsong_soo > 1:
for index in range(piau_im_tsong_soo):
piau_im = kong_un_piau_im[index]
han_ji_id = piau_im['漢字識別號']
sing_bu = piau_im['上字標音'] if piau_im['上字標音'] != "Ø" else "q"
un_bu = piau_im['下字標音']
tiau_ho = piau_tiau_ho(piau_im)
cu_im = f"{sing_bu}{un_bu}{tiau_ho}"

# 記錄對映至【漢字注音表】的【列號】
ji_khoo_piau.range("A" + str(ji_khoo_index)).value = source_index

# 記錄【字庫】資料庫的【紀錄識別碼(Record ID of Table)】
ji_khoo_piau.range("B" + str(ji_khoo_index)).value = han_ji_id

ji_khoo_piau.range("C" + str(ji_khoo_index)).value = (
beh_piau_im_e_han_ji
)
ji_khoo_piau.range("D" + str(ji_khoo_index)).value = cu_im
ji_khoo_piau.range("E" + str(ji_khoo_index)).value = sing_bu
ji_khoo_piau.range("F" + str(ji_khoo_index)).value = un_bu
ji_khoo_piau.range("G" + str(ji_khoo_index)).value = tiau_ho

ji_khoo_index += 1

# =========================================================
# 調整讀取來源;寫入標的各手標
# =========================================================
target_index += 1
source_index += 1

# ==========================================================
# 關閉資料庫
# ==========================================================
conn.close()

if __name__ == "__main__":
CONVERT_FILE_NAME = "output\\Piau-Tsu-Im.xlsx"
Kong_Un_Piau_Im(CONVERT_FILE_NAME)

0 comments on commit 3a0a68e

Please sign in to comment.