-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
11,010 additions
and
2 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import re | ||
|
||
import sqlite3 | ||
import xlwings as xw | ||
|
||
# 專案全域常數 | ||
from config_dev_env import DATABASE | ||
|
||
def main_run(CONVERT_FILE_NAME): | ||
# ========================================================== | ||
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號 | ||
# 聲母、韻母、調號,分別存放在 C、D、E 欄 | ||
# ========================================================== | ||
|
||
# 指定提供來源的【檔案】 | ||
file_path = CONVERT_FILE_NAME | ||
wb = xw.Book(file_path) | ||
|
||
# 指定提供來源的【工作表】;及【總列數】 | ||
source_sheet = wb.sheets["漢字注音表"] | ||
end_of_row_no = ( | ||
source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row | ||
) | ||
end_of_row_no = int(end_of_row_no) - 1 | ||
print(f"end_row = {end_of_row_no}") | ||
|
||
# ========================================================== | ||
# 備妥程式需使用之工作表 | ||
# ========================================================== | ||
sheet_name_list = [ | ||
"缺字表", | ||
"字庫表", | ||
] | ||
# ---------------------------------------------------------- | ||
# 檢查工作表是否已存在? | ||
# 若已存在,則清除工作表內容; | ||
# 若不存在,則新增工作表 | ||
# ---------------------------------------------------------- | ||
for sheet_name in sheet_name_list: | ||
sheet = wb.sheets[sheet_name] | ||
try: | ||
sheet.select() | ||
sheet.clear() | ||
continue | ||
except Exception as e: | ||
# CommandError 的 Exception 發生日,表工作表不存在 | ||
# 新增程式需使用之工作表 | ||
print(e) | ||
wb.sheets.add(name=sheet_name) | ||
|
||
khiam_ji_piau = wb.sheets["缺字表"] | ||
ji_khoo_piau = wb.sheets["字庫表"] | ||
|
||
# ========================================================== | ||
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號 | ||
# 聲母、韻母、調號,分別存放在 C、D、E 欄 | ||
# ========================================================== | ||
han_ji_tsu_im_piau = wb.sheets["漢字注音表"] | ||
han_ji_tsu_im_piau.select() | ||
|
||
# ==========================================================" | ||
# 資料庫", | ||
# ==========================================================" | ||
conn = sqlite3.connect(DATABASE) | ||
db_cursor = conn.cursor() | ||
source_index = 1 # index for source sheet | ||
target_index = 1 | ||
ji_khoo_index = 1 | ||
khiam_ji_index = 1 | ||
|
||
while source_index <= end_of_row_no: | ||
print(f"row = {source_index}") | ||
# 自 source_sheet 取出一個「欲查注音的漢字」(beh_tshue_tsu_im_e_ji) | ||
beh_tshue_tsu_im_e_ji = str( | ||
source_sheet.range("A" + str(source_index)).value | ||
).strip() | ||
|
||
# ========================================================= | ||
# 如是空白或換行,處理換行 | ||
# ========================================================= | ||
if beh_tshue_tsu_im_e_ji == " " or beh_tshue_tsu_im_e_ji == "": | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
elif beh_tshue_tsu_im_e_ji == "\n": | ||
han_ji_tsu_im_piau.range("A" + str(target_index)).value = "\n" | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
|
||
# ========================================================= | ||
# 若取出之字為標點符號,則跳過,並繼續取下一個漢字。 | ||
# ========================================================= | ||
piau_tiam_1 = r"[,、:;.。?!()「」【】《》“]" | ||
piau_tiam_2 = r"[\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F\uFE50-\uFE5E]" # noqa: E501 | ||
# piau_tiam = r"[\u2013-\u2026\u3000-\u303F\uFE50-\uFF20]" | ||
piau_tiam = f"{piau_tiam_1}|{piau_tiam_2}" | ||
is_piau_tiam = re.search(piau_tiam, beh_tshue_tsu_im_e_ji, re.M | re.I) | ||
if is_piau_tiam: | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
|
||
# ========================================================= | ||
# 在【字庫】資料庫查找【注音碼】 | ||
# SQL 查詢指令:自字庫查找某漢字之注音碼 | ||
# ========================================================= | ||
# sql = select id, han_ji, tl_im, freq, siann, un, tiau | ||
# from han_ji | ||
# where han_ji='{search_han_ji}' | ||
sql = ( | ||
"SELECT 識別號, 漢字, 切音, 字韻, 聲調, 原始拼音, 舒促聲, 聲, 韻, 調, 拼音碼, 雅俗通標音, 十五音標音, 常用度 " | ||
"FROM 雅俗通字庫 " | ||
f"WHERE 漢字='{beh_tshue_tsu_im_e_ji}' " | ||
"ORDER BY 常用度 DESC;" | ||
) | ||
db_cursor.execute(sql) | ||
ji_e_piau_im = db_cursor.fetchall() | ||
|
||
# ========================================================= | ||
# 若是查不到漢字的注音碼,在【缺字表】做記錄 | ||
# ========================================================= | ||
if not ji_e_piau_im: | ||
print(f"Can not find 【{beh_tshue_tsu_im_e_ji}】in Han-Ji-Khoo!!") | ||
# 記錄【缺字表】的【列號】 | ||
khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index | ||
# 記錄【缺字表】的【漢字】 | ||
khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_tshue_tsu_im_e_ji | ||
# 記錄【漢字注音表】的【列號】 | ||
khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index | ||
khiam_ji_index += 1 | ||
target_index += 1 | ||
source_index += 1 | ||
continue | ||
|
||
# ========================================================= | ||
# 自【字庫】查到的【漢字】,取出:聲母、韻母、調號 | ||
# ========================================================= | ||
piau_im_tsong_soo = len(ji_e_piau_im) | ||
han_ji_id = ji_e_piau_im[0][0] | ||
tsu_im = ji_e_piau_im[0][10] | ||
siann_bu = ji_e_piau_im[0][7] | ||
un_bu = ji_e_piau_im[0][8] | ||
tiau_ho = ji_e_piau_im[0][9] | ||
freq = ji_e_piau_im[0][13] | ||
|
||
# ========================================================= | ||
# 寫入:【漢字注音表】 | ||
# ========================================================= | ||
han_ji_tsu_im_piau.range("B" + str(target_index)).value = tsu_im | ||
han_ji_tsu_im_piau.range("C" + str(target_index)).value = siann_bu | ||
han_ji_tsu_im_piau.range("D" + str(target_index)).value = un_bu | ||
han_ji_tsu_im_piau.range("E" + str(target_index)).value = tiau_ho | ||
han_ji_tsu_im_piau.range("F" + str(target_index)).value = piau_im_tsong_soo | ||
han_ji_tsu_im_piau.range("G" + str(target_index)).value = freq | ||
|
||
# ========================================================= | ||
# 若是查到漢字有一個以上的注音碼,在【字庫表】做記錄 | ||
# ji_khoo_sheet = wb.sheets["字庫表"] | ||
# ========================================================= | ||
if piau_im_tsong_soo > 1: | ||
for piau_im_index in range(piau_im_tsong_soo): | ||
han_ji_id = ji_e_piau_im[piau_im_index][0] | ||
tsu_im = ji_e_piau_im[piau_im_index][2] | ||
freq = ji_e_piau_im[piau_im_index][3] | ||
siann_bu = ji_e_piau_im[piau_im_index][4] | ||
un_bu = ji_e_piau_im[piau_im_index][5] | ||
tiau_ho = ji_e_piau_im[piau_im_index][6] | ||
|
||
# 記錄對映至【漢字注音表】的【列號】 | ||
ji_khoo_piau.range("A" + str(ji_khoo_index)).value = source_index | ||
|
||
# 記錄【字庫】資料庫的【紀錄識別碼(Record ID of Table)】 | ||
ji_khoo_piau.range("B" + str(ji_khoo_index)).value = han_ji_id | ||
|
||
ji_khoo_piau.range( | ||
"C" + str(ji_khoo_index) | ||
).value = beh_tshue_tsu_im_e_ji | ||
ji_khoo_piau.range("D" + str(ji_khoo_index)).value = tsu_im | ||
ji_khoo_piau.range("E" + str(ji_khoo_index)).value = siann_bu | ||
ji_khoo_piau.range("F" + str(ji_khoo_index)).value = un_bu | ||
ji_khoo_piau.range("G" + str(ji_khoo_index)).value = tiau_ho | ||
ji_khoo_piau.range("H" + str(ji_khoo_index)).value = freq | ||
|
||
ji_khoo_index += 1 | ||
|
||
# ========================================================= | ||
# 調整讀取來源;寫入標的各手標 | ||
# ========================================================= | ||
target_index += 1 | ||
source_index += 1 | ||
|
||
# ========================================================== | ||
# 關閉資料庫 | ||
# ========================================================== | ||
conn.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
識別號,韻母碼,國際音標,白話字,台羅,閩拼,方音,十五音,十五音序,舒促聲,十五音識別碼 | ||
1,un,un,un,un,un,ㄨㄣ,君,1,舒聲,君舒 | ||
2,ut,ut̚,ut,ut,ut,ㄨㆵ,君,1,促聲,君促 | ||
3,ian,ian,ian,ian,ian,ㄧㄢ,堅,2,舒聲,堅舒 | ||
4,iat,iat̚,iat,iat,iat,ㄧㄚㆵ,堅,2,促聲,堅促 | ||
5,im,im,im,im,im,ㄧㆬ,金,3,舒聲,金舒 | ||
6,ip,ip̚,ip,ip,ip,一ㆴ,金,3,促聲,金促 | ||
7,ui,ui,ui,ui,ui,ㄨㄧ,規,4,舒聲,規舒 | ||
8,ee,ɛ,e,ee,e,ㄝ,嘉,5,舒聲,嘉舒 | ||
9,eeh,ɛ?,eeh,eeh,,ㄝㆷ,嘉,5,促聲,嘉促 | ||
10,an,an,an,an,an,ㄢ,干,6,舒聲,干舒 | ||
11,at,at̚,at,at,at,ㄚㆵ,干,6,促聲,干促 | ||
12,ong,ɔŋ,ong,ong,ong,ㆲ,公,7,舒聲,公舒 | ||
13,ok,ɔk̚,ok,ok,ok,ㆦㆻ,公,7,促聲,公促 | ||
14,uai,uai,oai,uai,uai,ㄨㄞ,乖,8,舒聲,乖舒 | ||
15,uaih,uai?,oaih,uaih,uaih,ㄨㄞㆷ,乖,8,促聲,乖促 | ||
16,ing,iŋ,eng,ing,ing,ㄧㄥ,經,9,舒聲,經舒 | ||
17,ik,ik̚,ek,ik,ik,ㄧㆻ,經,9,促聲,經促 | ||
18,uan,uan,oan,uan,uan,ㄨㄢ,觀,10,舒聲,觀舒 | ||
19,uat,uat̚,oat,uat,uat,ㄨㄚㆵ,觀,10,促聲,觀促 | ||
20,oo,ɔu,o͘,oo,oo,ㆦ,沽,11,舒聲,沽舒 | ||
21,iau,iau,iau,iau,iao,ㄧㄠ,嬌,12,舒聲,嬌舒 | ||
22,iauh,iau?,iauh,iauh,iaoh,ㄧㄠㆷ,嬌,12,促聲,嬌促 | ||
23,ei,ei,e,e,e,ㆤ,稽,13,舒聲,稽舒 | ||
24,iong,iɔŋ,iong,iong,iong,ㄧㆲ,恭,14,舒聲,恭舒 | ||
25,iok,iɔk̚,iok,iok,iok,ㄧㆦㆻ,恭,14,促聲,恭促 | ||
26,o,ə,o,o,o,ㄜ,高,15,舒聲,高舒 | ||
27,oh,ə?,oh,oh,oh,ㄜㆷ,高,15,促聲,高促 | ||
28,ai,ai,ai,ai,ai,ㄞ,皆,16,舒聲,皆舒 | ||
29,in,in,in,in,in,ㄧㄣ,巾,17,舒聲,巾舒 | ||
30,it,it̚,it,it,it,ㄧㆵ,巾,17,促聲,巾促 | ||
31,iang,iaŋ,iang,iang,iang,ㄧㄤ,姜,18,舒聲,姜舒 | ||
32,iak,iak̚,iak,iak,iak,ㄧㄚㆻ,姜,18,促聲,姜促 | ||
33,am,am,am,am,am,ㆰ,甘,19,舒聲,甘舒 | ||
34,ap,ap̚,ap,ap,ap,ㄚㆴ,甘,19,促聲,甘促 | ||
35,ua,ua,oa,ua,ua,ㄨㄚ,瓜,20,舒聲,瓜舒 | ||
36,uah,ua?,oah,uah,uah,ㄨㄚㆷ,瓜,20,促聲,瓜促 | ||
37,ang,aŋ,ang,ang,ang,ㄤ,江,21,舒聲,江舒 | ||
38,ak,ak̚,ak,ak,ak,ㄚㆻ,江,21,促聲,江促 | ||
39,iam,iam,iam,iam,iam,ㄧㆰ,兼,22,舒聲,兼舒 | ||
40,iap,iap̚,iap,iap,iap,ㄧㄚㆴ,兼,22,促聲,兼促 | ||
41,au,au,au,au,ao,ㄠ,交,23,舒聲,交舒 | ||
42,auh,au?,auh,auh,aoh,ㄠㆷ,交,23,促聲,交促 | ||
43,ia,ia,ia,ia,ia,ㄧㄚ,迦,24,舒聲,迦舒 | ||
44,iah,ia?,iah,iah,iah,ㄧㄚㆷ,迦,24,促聲,迦促 | ||
45,ue,ue,oe,ue,ue,ㄨㆤ,檜,25,舒聲,檜舒 | ||
46,ueh,ue?,oeh,ueh,ueh,ㄨㆤㆷ,檜,25,促聲,檜促 | ||
47,ann,ã,aⁿ,ann,na,ㆩ,監,26,舒聲,監舒 | ||
48,ahnn,ã?,aⁿh,annh,nah,ㆩㆷ,監,26,促聲,監促 | ||
49,u,u,u,u,u,ㄨ,艍,27,舒聲,艍舒 | ||
50,uh,u?,uh,uh,uh,ㄨㆷ,艍,27,促聲,艍促 | ||
51,a,a,a,a,a,ㄚ,膠,28,舒聲,膠舒 | ||
52,ah,a?,ah,ah,ah,ㄚㆷ,膠,28,促聲,膠促 | ||
53,i,i,i,i,i,ㄧ,居,29,舒聲,居舒 | ||
54,ih,i?,ih,ih,ih,ㄧㆷ,居,29,促聲,居促 | ||
55,iu,iu,iu,iu,iu,ㄧㄨ,丩,30,舒聲,丩舒 | ||
56,enn,ẽ,eⁿ,enn,ne,ㆥ,更,31,舒聲,更舒 | ||
57,ehnn,ẽ?,eⁿh,ennh,neh,ㆥㆷ,更,31,促聲,更促 | ||
58,uinn,uĩ,uiⁿ,uinn,nui,ㄨㆪ,褌,32,舒聲,褌舒 | ||
59,io,iə,io,io,io,ㄧㄜ,茄,33,舒聲,茄舒 | ||
60,ioh,iə?,ioh,ioh,ioh,ㄧㄜㆷ,茄,33,促聲,茄促 | ||
61,inn,ĩ,iⁿ,inn,ni,ㆪ,梔,34,舒聲,梔舒 | ||
62,ihnn,ĩ?,iⁿh,innh,nih,ㆪㆷ,梔,34,促聲,梔促 | ||
63,ionn,ĩɔ̃,ioⁿ,ionn,nioo,ㄧㆧ,薑,35,舒聲,薑舒 | ||
64,iann,iã,iaⁿ,iannh,nia,ㄧㆩ,驚,36,舒聲,驚舒 | ||
65,uann,ũã,oaⁿ,uann,nua,ㄨㆩ,官,37,舒聲,官舒 | ||
66,ng,ŋ̍,ng,ng,ng,ㆭ,鋼,38,舒聲,鋼舒 | ||
67,e,e,e,e,e,ㆤ,伽,39,舒聲,伽舒 | ||
68,eh,e?,eh,eh,eh,ㆤㆷ,伽,39,促聲,伽促 | ||
69,ainn,ãĩ,aiⁿ,ainn,nai,ㆮ,閒,40,舒聲,閒舒 | ||
70,oonn,ɔ̃ũ,oⁿ,onn,noo,ㆧ,姑,41,舒聲,姑舒 | ||
71,m,m̩,m,m,m,ㆬ,姆,42,舒聲,姆舒 | ||
72,uang,uaŋ,oang,uang,uang,ㄨㄤ,光,43,舒聲,光舒 | ||
73,uak,uak̚,oak,uak,uak,ㄨㄚㆻ,光,43,促聲,光促 | ||
74,uainn,uãĩ,oaiⁿ,uainn,nuai,ㄨㆮ,閂,44,舒聲,閂舒 | ||
75,uaihnn,uãĩ?,oaiⁿh,uainnh,nuaih,ㄨㆮㆷ,閂,44,促聲,閂促 | ||
76,uenn,uẽ,oeⁿ,uenn,nue,ㄨㆥ,糜,45,舒聲,糜舒 | ||
77,iaunn,ĩãũ,iauⁿ,iaunn,niao,ㄧㆯ,嘄,46,舒聲,嘄舒 | ||
78,iauhnn,ĩãũ?,iauⁿh,iaunnh,niaoh,ㄧㆯㆷ,嘄,46,促聲,嘄促 | ||
79,om,ɔm,om,om,om,ㆱ,箴,47,舒聲,箴舒 | ||
80,op,ɔp̚,op,op,op,ㆦㆴ,箴,47,促聲,箴促 | ||
81,aunn,ãũ,auⁿ,aunn,nao,ㆯ,爻,48,舒聲,爻舒 | ||
82,onn,õ,oⁿ,onn,noo,ㆧ,扛,49,舒聲,扛舒 | ||
83,ohnn,õh,oⁿh,onnh,nooh,ㆧㆷ,扛,49,促聲,扛促 | ||
84,iunn,iũ,iuⁿ,iunn,niu,ㄧㆫ,牛,50,舒聲,牛舒 |
Binary file not shown.
Oops, something went wrong.