Skip to content

Commit

Permalink
變更漢字標音出處,取自 "漢字檢視" 之 "漢字標音" 欄。然後據此切分出:聲、韻、調。
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Apr 19, 2024
1 parent d8e2175 commit 1086664
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 46 deletions.
Binary file modified Documents/D300_廣韻_河洛話篇.xlsx
Binary file not shown.
Binary file modified Kong_Un_V2.db
Binary file not shown.
99 changes: 70 additions & 29 deletions mod_廣韻.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import sqlite3


Expand Down Expand Up @@ -331,46 +332,86 @@ def init_un_bu_dict(cursor):
return un_bu_dict


# ==========================================================
# 自漢字的「注音碼」,分析出:聲母、韻母、調號
# ==========================================================
def split_cu_im(cu_im):
sing_bu_pattern = re.compile(r"(b|ch|c|g|h|j|kh|k|l|m|ng|n|ph|p|s|th|t|Ø)")
result = []

sing_bu = sing_bu_pattern.match(cu_im).group()
un_bu = cu_im[len(sing_bu) : len(cu_im) - 1]
tiau = cu_im[len(cu_im) - 1]

result += [sing_bu]
result += [un_bu]
result += [tiau]
return result


if __name__ == "__main__":
sing_bu_dict = init_sing_bu_dict()
sing_code = 'c'
# sing_bu_dict = init_sing_bu_dict()
# sing_code = 'c'

sing_bu_tl = sing_bu_dict[sing_code]['tl']
assert sing_bu_tl == 'tsh', "轉換錯誤!"
# sing_bu_tl = sing_bu_dict[sing_code]['tl']
# assert sing_bu_tl == 'tsh', "轉換錯誤!"

sing_bu_ipa = sing_bu_dict[sing_code]['ipa']
assert sing_bu_ipa == 'ʦʰ', "轉換錯誤!"
# sing_bu_ipa = sing_bu_dict[sing_code]['ipa']
# assert sing_bu_ipa == 'ʦʰ', "轉換錯誤!"

sing_bu_poj = sing_bu_dict[sing_code]['poj']
assert sing_bu_poj == 'chh', "轉換錯誤!"
# sing_bu_poj = sing_bu_dict[sing_code]['poj']
# assert sing_bu_poj == 'chh', "轉換錯誤!"

sing_bu_bp = sing_bu_dict[sing_code]['bp']
assert sing_bu_bp == 'c', "轉換錯誤!"
# sing_bu_bp = sing_bu_dict[sing_code]['bp']
# assert sing_bu_bp == 'c', "轉換錯誤!"

sing_bu_tps = sing_bu_dict[sing_code]['tps']
assert sing_bu_tps == 'ㄘ', "轉換錯誤!"
# sing_bu_tps = sing_bu_dict[sing_code]['tps']
# assert sing_bu_tps == 'ㄘ', "轉換錯誤!"

sing_bu_sni = sing_bu_dict[sing_code]['sni']
assert sing_bu_sni == '出', "轉換錯誤!"
# sing_bu_sni = sing_bu_dict[sing_code]['sni']
# assert sing_bu_sni == '出', "轉換錯誤!"

#--------------------------------------------------
un_bu_dict = init_un_bu_dict()
un_code = 'ee'
# #--------------------------------------------------
# un_bu_dict = init_un_bu_dict()
# un_code = 'ee'

# un_bu_tl = un_bu_dict[un_code]['tl']
# assert un_bu_tl == 'ee', "轉換錯誤!"

un_bu_tl = un_bu_dict[un_code]['tl']
assert un_bu_tl == 'ee', "轉換錯誤!"
# un_bu_ipa = un_bu_dict[un_code]['ipa']
# assert un_bu_ipa == 'ɛ', "轉換錯誤!"

un_bu_ipa = un_bu_dict[un_code]['ipa']
assert un_bu_ipa == 'ɛ', "轉換錯誤!"
# un_bu_poj = un_bu_dict[un_code]['poj']
# assert un_bu_poj == 'e', "轉換錯誤!"

un_bu_poj = un_bu_dict[un_code]['poj']
assert un_bu_poj == 'e', "轉換錯誤!"
# un_bu_bp = un_bu_dict[un_code]['bp']
# assert un_bu_bp == 'e', "轉換錯誤!"

un_bu_bp = un_bu_dict[un_code]['bp']
assert un_bu_bp == 'e', "轉換錯誤!"
# un_bu_tps = un_bu_dict[un_code]['tps']
# assert un_bu_tps == '', "轉換錯誤!"

un_bu_tps = un_bu_dict[un_code]['tps']
assert un_bu_tps == '', "轉換錯誤!"
# un_bu_sni = un_bu_dict[un_code]['sni']
# assert un_bu_sni == '', "轉換錯誤!"

un_bu_sni = un_bu_dict[un_code]['sni']
assert un_bu_sni == '嘉', "轉換錯誤!"
#--------------------------------------------------
# 受[siu2]
han_ji_piau_im = 'siu2'
result = split_cu_im(han_ji_piau_im)
print(result)
assert result == ['s', 'iu', '2'], "轉換錯誤!"
# assert result == ['s', 'iu', '3'], "轉換錯誤!"

# 衣 [Øi1]
han_ji_piau_im = 'Øi1'
result = split_cu_im(han_ji_piau_im)
assert result == ['Ø', 'i', '1'], "轉換錯誤!"

# 州 [ciu1]
han_ji_piau_im = 'ciu1'
result = split_cu_im(han_ji_piau_im)
assert result == ['c', 'iu', '1'], "轉換錯誤!"

# 此 [chu2]
han_ji_piau_im = 'chi2'
result = split_cu_im(han_ji_piau_im)
assert result == ['ch', 'i', '2'], "轉換錯誤!"
Binary file modified output/Piau-Tsu-Im.xlsx
Binary file not shown.
40 changes: 23 additions & 17 deletions p501_Kong_Un_Cha_Ji_Tian.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@

import xlwings as xw

from mod_廣韻 import han_ji_cha_piau_im, piau_tiau_ho

# import sqlite3



# 專案全域常數
# from config_dev_env import DATABASE
# DATABASE = "Kong_Un_V2.db"
from mod_廣韻 import han_ji_cha_piau_im, piau_tiau_ho, split_cu_im


def Kong_Un_Piau_Im(CONVERT_FILE_NAME, db_cursor):
Expand Down Expand Up @@ -136,14 +128,19 @@ def Kong_Un_Piau_Im(CONVERT_FILE_NAME, db_cursor):
piau_im_tsong_soo = len(kong_un_piau_im)
piau_im = kong_un_piau_im[0]
han_ji_id = piau_im['漢字識別號']
# sing_bu = piau_im['上字標音'] if piau_im['上字標音'] != "Ø" else "q"

# =========================================================
# 若是漢字注音碼有異常狀況,在【缺字表】做記錄
# =========================================================
sing_bu = None; un_bu = None; tiau_ho = None; cu_im = None

if piau_im['聲母碼'] == None or piau_im['韻母碼'] == None:
piau_im_list = split_cu_im(piau_im['漢字標音'])
sing_bu = piau_im_list[0]
un_bu = piau_im_list[1]
tiau_ho = piau_im_list[2]
piau_im_str = [str(i) for i in piau_im_list] # 將 list 中所有元素轉為 string
cu_im = ''.join(piau_im_str) # 將 list 中所有元素合併為一個 string

if sing_bu == None or un_bu == None or tiau_ho == None:
print(f"廣韻字典的漢字:【{beh_piau_im_e_han_ji}】有缺聲母/韻母/調號異常之問題!!")
# 記錄【缺字表】的【列號】
khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index
Expand All @@ -152,11 +149,20 @@ def Kong_Un_Piau_Im(CONVERT_FILE_NAME, db_cursor):
# 記錄【漢字注音表】的【列號】
khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index
khiam_ji_index += 1
else:
sing_bu = piau_im['聲母碼']
un_bu = piau_im['韻母碼']
tiau_ho = piau_tiau_ho(piau_im)
cu_im = f"{sing_bu}{un_bu}{tiau_ho}"
# if piau_im['聲母碼'] == None or piau_im['韻母碼'] == None:
# print(f"廣韻字典的漢字:【{beh_piau_im_e_han_ji}】有缺聲母/韻母/調號異常之問題!!")
# # 記錄【缺字表】的【列號】
# khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index
# # 記錄【缺字表】的【漢字】
# khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_piau_im_e_han_ji
# # 記錄【漢字注音表】的【列號】
# khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index
# khiam_ji_index += 1
# else:
# sing_bu = piau_im['聲母碼']
# un_bu = piau_im['韻母碼']
# tiau_ho = piau_tiau_ho(piau_im)
# cu_im = f"{sing_bu}{un_bu}{tiau_ho}"

# =========================================================
# 寫入:【漢字注音表】
Expand Down

0 comments on commit 1086664

Please sign in to comment.