-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
重構專案文檔管理架構;並請查字典找漢字讀音的資料庫改用 SQLite 。
- Loading branch information
Showing
30 changed files
with
242 additions
and
41 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,3 +20,6 @@ | |
|
||
# Constants | ||
WAIT_TIME = 5 # seconds | ||
|
||
# Database | ||
DATABASE_PATH = '.\\Kong_Un.db' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# m100 建立標注音檔案 | ||
|
||
```plantuml | ||
@startuml | ||
start | ||
partition 新建活頁簿檔案 { | ||
:建立一個 Excel 活頁簿檔案; | ||
:將預設工作表的 A 欄寬度設為 128; | ||
:繼上,設定 A 欄所有儲存格,皆可自動換行; | ||
} | ||
' 自 output 子目錄、env.xlsx 活頁簿檔案,抄錄 env 工作表; | ||
partition 複製模版工作表 { | ||
:打開模版活頁簿檔案 ./output/env.xlsx; | ||
:指定此活頁簿檔案為來源檔案; | ||
:將來源檔案的工作表複製到新建活頁簿檔案中; | ||
} | ||
partition 存檔處理 { | ||
:將新建之活頁簿檔案,以 "Piau-Tsu-Im.xlsx" 為檔名儲存; | ||
} | ||
stop | ||
@enduml | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# m300 查字典標注音 | ||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
import re | ||
|
||
import psycopg2 | ||
import xlwings as xw | ||
|
||
|
||
def main_run(CONVERT_FILE_NAME): | ||
# ========================================================== | ||
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號 | ||
# 聲母、韻母、調號,分別存放在 C、D、E 欄 | ||
# ========================================================== | ||
|
||
# 指定提供來源的【檔案】 | ||
file_path = CONVERT_FILE_NAME | ||
wb = xw.Book(file_path) | ||
|
||
# 指定提供來源的【工作表】;及【總列數】 | ||
source_sheet = wb.sheets["漢字注音表"] | ||
end_of_row_no = ( | ||
source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row | ||
) | ||
end_of_row_no = int(end_of_row_no) - 1 | ||
print(f"end_row = {end_of_row_no}") | ||
|
||
# ========================================================== | ||
# 備妥程式需使用之工作表 | ||
# ========================================================== | ||
sheet_name_list = [ | ||
"缺字表", | ||
"字庫表", | ||
] | ||
# ---------------------------------------------------------- | ||
# 檢查工作表是否已存在? | ||
# 若已存在,則清除工作表內容; | ||
# 若不存在,則新增工作表 | ||
# ---------------------------------------------------------- | ||
for sheet_name in sheet_name_list: | ||
sheet = wb.sheets[sheet_name] | ||
try: | ||
sheet.select() | ||
sheet.clear() | ||
continue | ||
except Exception as e: | ||
# CommandError 的 Exception 發生日,表工作表不存在 | ||
# 新增程式需使用之工作表 | ||
print(e) | ||
wb.sheets.add(name=sheet_name) | ||
|
||
khiam_ji_piau = wb.sheets["缺字表"] | ||
ji_khoo_piau = wb.sheets["字庫表"] | ||
|
||
# ========================================================== | ||
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號 | ||
# 聲母、韻母、調號,分別存放在 C、D、E 欄 | ||
# ========================================================== | ||
han_ji_tsu_im_piau = wb.sheets["漢字注音表"] | ||
han_ji_tsu_im_piau.select() | ||
|
||
# ==========================================================" | ||
# 資料庫", | ||
# ==========================================================" | ||
conn = psycopg2.connect( | ||
# database="alanjui", user="alanjui", host="127.0.0.1", port="5432" | ||
database="Ho_Lok_Ue", user="postgres", password="ChingHai99@", host="127.0.0.1", port="5432" | ||
) | ||
db_cursor = conn.cursor() | ||
source_index = 1 # index for source sheet | ||
target_index = 1 | ||
ji_khoo_index = 1 | ||
khiam_ji_index = 1 | ||
|
||
while source_index <= end_of_row_no: | ||
print(f"row = {source_index}") | ||
# 自 source_sheet 取出一個「欲查注音的漢字」(beh_tshue_tsu_im_e_ji) | ||
beh_tshue_tsu_im_e_ji = str( | ||
source_sheet.range("A" + str(source_index)).value | ||
).strip() | ||
|
||
# ========================================================= | ||
# 如是空白或換行,處理換行 | ||
# ========================================================= | ||
if beh_tshue_tsu_im_e_ji == " " or beh_tshue_tsu_im_e_ji == "": | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
elif beh_tshue_tsu_im_e_ji == "\n": | ||
han_ji_tsu_im_piau.range("A" + str(target_index)).value = "\n" | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
|
||
# ========================================================= | ||
# 若取出之字為標點符號,則跳過,並繼續取下一個漢字。 | ||
# ========================================================= | ||
piau_tiam_1 = r"[,、:;.。?!()「」【】《》“]" | ||
piau_tiam_2 = r"[\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F\uFE50-\uFE5E]" # noqa: E501 | ||
# piau_tiam = r"[\u2013-\u2026\u3000-\u303F\uFE50-\uFF20]" | ||
piau_tiam = f"{piau_tiam_1}|{piau_tiam_2}" | ||
is_piau_tiam = re.search(piau_tiam, beh_tshue_tsu_im_e_ji, re.M | re.I) | ||
if is_piau_tiam: | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
|
||
# ========================================================= | ||
# 在【字庫】資料庫查找【注音碼】 | ||
# SQL 查詢指令:自字庫查找某漢字之注音碼 | ||
# ========================================================= | ||
# sql = select id, han_ji, chu_im, freq, siann, un, tiau | ||
# from han_ji | ||
# where han_ji='{search_han_ji}' | ||
sql = ( | ||
"SELECT id, han_ji, chu_im, freq, siann, un, tiau " | ||
"FROM han_ji_dict " | ||
f"WHERE han_ji='{beh_tshue_tsu_im_e_ji}' " | ||
"ORDER BY freq DESC;" | ||
) | ||
db_cursor.execute(sql) | ||
ji_e_piau_im = db_cursor.fetchall() | ||
|
||
# ========================================================= | ||
# 若是查不到漢字的注音碼,在【缺字表】做記錄 | ||
# ========================================================= | ||
if not ji_e_piau_im: | ||
print(f"Can not find 【{beh_tshue_tsu_im_e_ji}】in Han-Ji-Khoo!!") | ||
# 記錄【缺字表】的【列號】 | ||
khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index | ||
# 記錄【缺字表】的【漢字】 | ||
khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_tshue_tsu_im_e_ji | ||
# 記錄【漢字注音表】的【列號】 | ||
khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index | ||
khiam_ji_index += 1 | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
|
||
# ========================================================= | ||
# 自【字庫】查到的【漢字】,取出:聲母、韻母、調號 | ||
# ========================================================= | ||
piau_im_tsong_soo = len(ji_e_piau_im) | ||
han_ji_id = ji_e_piau_im[0][0] | ||
tsu_im = ji_e_piau_im[0][2] | ||
freq = ji_e_piau_im[0][3] | ||
siann_bu = ji_e_piau_im[0][4] | ||
un_bu = ji_e_piau_im[0][5] | ||
tiau_ho = ji_e_piau_im[0][6] | ||
|
||
# ========================================================= | ||
# 寫入:【漢字注音表】 | ||
# ========================================================= | ||
han_ji_tsu_im_piau.range("B" + str(target_index)).value = tsu_im | ||
han_ji_tsu_im_piau.range("C" + str(target_index)).value = siann_bu | ||
han_ji_tsu_im_piau.range("D" + str(target_index)).value = un_bu | ||
han_ji_tsu_im_piau.range("E" + str(target_index)).value = tiau_ho | ||
han_ji_tsu_im_piau.range("F" + str(target_index)).value = piau_im_tsong_soo | ||
han_ji_tsu_im_piau.range("G" + str(target_index)).value = freq | ||
|
||
# ========================================================= | ||
# 若是查到漢字有一個以上的注音碼,在【字庫表】做記錄 | ||
# ji_khoo_sheet = wb.sheets["字庫表"] | ||
# ========================================================= | ||
if piau_im_tsong_soo > 1: | ||
for piau_im_index in range(piau_im_tsong_soo): | ||
han_ji_id = ji_e_piau_im[piau_im_index][0] | ||
tsu_im = ji_e_piau_im[piau_im_index][2] | ||
freq = ji_e_piau_im[piau_im_index][3] | ||
siann_bu = ji_e_piau_im[piau_im_index][4] | ||
un_bu = ji_e_piau_im[piau_im_index][5] | ||
tiau_ho = ji_e_piau_im[piau_im_index][6] | ||
|
||
# 記錄對映至【漢字注音表】的【列號】 | ||
ji_khoo_piau.range("A" + str(ji_khoo_index)).value = source_index | ||
|
||
# 記錄【字庫】資料庫的【紀錄識別碼(Record ID of Table)】 | ||
ji_khoo_piau.range("B" + str(ji_khoo_index)).value = han_ji_id | ||
|
||
ji_khoo_piau.range( | ||
"C" + str(ji_khoo_index) | ||
).value = beh_tshue_tsu_im_e_ji | ||
ji_khoo_piau.range("D" + str(ji_khoo_index)).value = tsu_im | ||
ji_khoo_piau.range("E" + str(ji_khoo_index)).value = siann_bu | ||
ji_khoo_piau.range("F" + str(ji_khoo_index)).value = un_bu | ||
ji_khoo_piau.range("G" + str(ji_khoo_index)).value = tiau_ho | ||
ji_khoo_piau.range("H" + str(ji_khoo_index)).value = freq | ||
|
||
ji_khoo_index += 1 | ||
|
||
# ========================================================= | ||
# 調整讀取來源;寫入標的各手標 | ||
# ========================================================= | ||
target_index += 1 | ||
source_index += 1 | ||
|
||
# ========================================================== | ||
# 關閉資料庫 | ||
# ========================================================== | ||
conn.close() |