-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
11,015 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
識別號,十五音字母,國際音標,台語音標,方音符號,白話字,台羅拼音,閩拼,備註 | ||
1,柳,l,l,ㄌ,l,l,l, | ||
2,邊,p,p,ㄅ,p,p,b, | ||
3,求,k,k,ㄍ,k,k,g, | ||
4,去,kʰ,kh,ㄎ,kh,kh,k, | ||
5,地,t,t,ㄉ,t,t,d, | ||
6,頗,pʰ,ph,ㄆ,ph,ph,p, | ||
7,他,tʰ,th,ㄊ,th,th,t, | ||
8,曾,ʦ,z,ㄗ,ch,ts,z, | ||
9,入,ʣ,j,ㆡ,j,j,zz, | ||
10,時,s,s,ㄙ,s,s,s, | ||
11,英,,q, ,,,, | ||
12,門,b,b,ㆠ,b,b,bb, | ||
13,語,ɡ,g,ㆣ,g,g,gg, | ||
14,出,ʦʰ,c,ㄘ,chh,tsh,c, | ||
15,喜,h,h,ㄏ,h,h,h, | ||
16,毛,m,m,ㄇ,m,m,bbn,b/m 本不分,此處分門/毛 | ||
17,耐,n,n,ㄋ,n,n,ln,l/n 本不分,此處分柳/耐 | ||
18,雅,ŋ,ng,ㄫ,ng,ng,ggn,g/ng 本不分,此處分語/雅 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
識別號,聲調,四聲調,舒促聲,台羅八聲調 | ||
1,上平,平,舒,1 | ||
2,上上,上,舒,2 | ||
3,上去,去,舒,3 | ||
4,上入,入,促,4 | ||
5,下平,平,舒,5 | ||
6,下上,上,舒,6 | ||
7,下去,去,舒,7 | ||
8,下入,入,促,8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
識別號,韻母編碼,十五音字母,韻母序,舒促,國際音標,台語音標,方音符號,白話字,台羅拚音,閩拼 | ||
1,君舒,君,1,舒,un,un,ㄨㄣ,un,un,un | ||
2,君促,君,1,促,ut̚,ut,ㄨㆵ,ut,ut,ut | ||
3,堅舒,堅,2,舒,ian,ian,ㄧㄢ,ian,ian,ian | ||
4,堅促,堅,2,促,iat̚,iat,ㄧㄚㆵ,iat,iat,iat | ||
5,金舒,金,3,舒,im,im,ㄧㆬ,im,im,im | ||
6,金促,金,3,促,ip̚,ip,一ㆴ,ip,ip,ip | ||
7,規舒,規,4,舒,ui,ui,ㄨㄧ,ui,ui,ui | ||
8,規促,規,4,促,,,,,, | ||
9,嘉舒,嘉,5,舒,ɛ,ee,ㄝ,e,ee,e | ||
10,嘉促,嘉,5,促,ɛ?,eeh,ㄝㆷ,eeh,eeh, | ||
11,干舒,干,6,舒,an,an,ㄢ,an,an,an | ||
12,干促,干,6,促,at̚,at,ㄚㆵ,at,at,at | ||
13,公舒,公,7,舒,ɔŋ,ong,ㆲ,ong,ong,ong | ||
14,公促,公,7,促,ɔk̚,ok,ㆦㆻ,ok,ok,ok | ||
15,乖舒,乖,8,舒,uai,uai,ㄨㄞ,oai,uai,uai | ||
16,乖促,乖,8,促,uai?,uaih,ㄨㄞㆷ,oaih,uaih,uaih | ||
17,經舒,經,9,舒,iŋ,ing,ㄧㄥ,eng,ing,ing | ||
18,經促,經,9,促,ik̚,ik,ㄧㆻ,ek,ik,ik | ||
19,觀舒,觀,10,舒,uan,uan,ㄨㄢ,oan,uan,uan | ||
20,觀促,觀,10,促,uat̚,uat,ㄨㄚㆵ,oat,uat,uat | ||
21,沽舒,沽,11,舒,ɔu,oo,ㆦ,o͘,oo,oo | ||
22,沽促,沽,11,促,,,,,, | ||
23,嬌舒,嬌,12,舒,iau,iau,ㄧㄠ,iau,iau,iao | ||
24,嬌促,嬌,12,促,iau?,iauh,ㄧㄠㆷ,iauh,iauh,iaoh | ||
25,稽舒,稽,13,舒,ei,ei,ㆤ,e,e,e | ||
26,稽促,稽,13,促,,,,,, | ||
27,恭舒,恭,14,舒,iɔŋ,iong,ㄧㆲ,iong,iong,iong | ||
28,恭促,恭,14,促,iɔk̚,iok,ㄧㆦㆻ,iok,iok,iok | ||
29,高舒,高,15,舒,ə,o,ㄜ,o,o,o | ||
30,高促,高,15,促,ə?,oh,ㄜㆷ,oh,oh,oh | ||
31,皆舒,皆,16,舒,ai,ai,ㄞ,ai,ai,ai | ||
32,皆促,皆,16,促,,,,,, | ||
33,巾舒,巾,17,舒,in,in,ㄧㄣ,in,in,in | ||
34,巾促,巾,17,促,it̚,it,ㄧㆵ,it,it,it | ||
35,姜舒,姜,18,舒,iaŋ,iang,ㄧㄤ,iang,iang,iang | ||
36,姜促,姜,18,促,iak̚,iak,ㄧㄚㆻ,iak,iak,iak | ||
37,甘舒,甘,19,舒,am,am,ㆰ,am,am,am | ||
38,甘促,甘,19,促,ap̚,ap,ㄚㆴ,ap,ap,ap | ||
39,瓜舒,瓜,20,舒,ua,ua,ㄨㄚ,oa,ua,ua | ||
40,瓜促,瓜,20,促,ua?,uah,ㄨㄚㆷ,oah,uah,uah | ||
41,江舒,江,21,舒,aŋ,ang,ㄤ,ang,ang,ang | ||
42,江促,江,21,促,ak̚,ak,ㄚㆻ,ak,ak,ak | ||
43,兼舒,兼,22,舒,iam,iam,ㄧㆰ,iam,iam,iam | ||
44,兼促,兼,22,促,iap̚,iap,ㄧㄚㆴ,iap,iap,iap | ||
45,交舒,交,23,舒,au,au,ㄠ,au,au,ao | ||
46,交促,交,23,促,au?,auh,ㄠㆷ,auh,auh,aoh | ||
47,迦舒,迦,24,舒,ia,ia,ㄧㄚ,ia,ia,ia | ||
48,迦促,迦,24,促,ia?,iah,ㄧㄚㆷ,iah,iah,iah | ||
49,檜舒,檜,25,舒,ue,ue,ㄨㆤ,oe,ue,ue | ||
50,檜促,檜,25,促,ue?,ueh,ㄨㆤㆷ,oeh,ueh,ueh | ||
51,監舒,監,26,舒,ã,ann,ㆩ,aⁿ,ann,na | ||
52,監促,監,26,促,ã?,ahnn,ㆩㆷ,aⁿh,annh,nah | ||
53,艍舒,艍,27,舒,u,u,ㄨ,u,u,u | ||
54,艍促,艍,27,促,u?,uh,ㄨㆷ,uh,uh,uh | ||
55,膠舒,膠,28,舒,a,a,ㄚ,a,a,a | ||
56,膠促,膠,28,促,a?,ah,ㄚㆷ,ah,ah,ah | ||
57,居舒,居,29,舒,i,i,ㄧ,i,i,i | ||
58,居促,居,29,促,i?,ih,ㄧㆷ,ih,ih,ih | ||
59,丩舒,丩,30,舒,iu,iu,ㄧㄨ,iu,iu,iu | ||
60,丩促,丩,30,促,,,,,, | ||
61,更舒,更,31,舒,ẽ,enn,ㆥ,eⁿ,enn,ne | ||
62,更促,更,31,促,ẽ?,ehnn,ㆥㆷ,eⁿh,ennh,neh | ||
63,褌舒,褌,32,舒,uĩ,uinn,ㄨㆪ,uiⁿ,uinn,nui | ||
64,褌促,褌,32,促,,,,,, | ||
65,茄舒,茄,33,舒,iə,io,ㄧㄜ,io,io,io | ||
66,茄促,茄,33,促,iə?,ioh,ㄧㄜㆷ,ioh,ioh,ioh | ||
67,梔舒,梔,34,舒,ĩ,inn,ㆪ,iⁿ,inn,ni | ||
68,梔促,梔,34,促,ĩ?,ihnn,ㆪㆷ,iⁿh,innh,nih | ||
69,薑舒,薑,35,舒,ĩɔ̃,ionn,ㄧㆧ,ioⁿ,ionn,nioo | ||
70,薑促,薑,35,促,,,,,, | ||
71,驚舒,驚,36,舒,iã,iann,ㄧㆩ,iaⁿ,iannh,nia | ||
72,驚促,驚,36,促,,,,,, | ||
73,官舒,官,37,舒,ũã,uann,ㄨㆩ,oaⁿ,uann,nua | ||
74,官促,官,37,促,,,,,, | ||
75,鋼舒,鋼,38,舒,ŋ̍,ng,ㆭ,ng,ng,ng | ||
76,鋼促,鋼,38,促,,,,,, | ||
77,伽舒,伽,39,舒,e,e,ㆤ,e,e,e | ||
78,伽促,伽,39,促,e?,eh,ㆤㆷ,eh,eh,eh | ||
79,閒舒,閒,40,舒,ãĩ,ainn,ㆮ,aiⁿ,ainn,nai | ||
80,閒促,閒,40,促,,,,,, | ||
81,姑舒,姑,41,舒,ɔ̃ũ,oonn,ㆧ,oⁿ,onn,noo | ||
82,姑促,姑,41,促,,,,,, | ||
83,姆舒,姆,42,舒,m̩,m,ㆬ,m,m,m | ||
84,姆促,姆,42,促,,,,,, | ||
85,光舒,光,43,舒,uaŋ,uang,ㄨㄤ,oang,uang,uang | ||
86,光促,光,43,促,uak̚,uak,ㄨㄚㆻ,oak,uak,uak | ||
87,閂舒,閂,44,舒,uãĩ,uainn,ㄨㆮ,oaiⁿ,uainn,nuai | ||
88,閂促,閂,44,促,uãĩ?,uaihnn,ㄨㆮㆷ,oaiⁿh,uainnh,nuaih | ||
89,糜舒,糜,45,舒,uẽ,uenn,ㄨㆥ,oeⁿ,uenn,nue | ||
90,糜促,糜,45,促,,,,,, | ||
91,嘄舒,嘄,46,舒,ĩãũ,iaunn,ㄧㆯ,iauⁿ,iaunn,niao | ||
92,嘄促,嘄,46,促,ĩãũ?,iauhnn,ㄧㆯㆷ,iauⁿh,iaunnh,niaoh | ||
93,箴舒,箴,47,舒,ɔm,om,ㆱ,om,om,om | ||
94,箴促,箴,47,促,ɔp̚,op,ㆦㆴ,op,op,op | ||
95,爻舒,爻,48,舒,ãũ,aunn,ㆯ,auⁿ,aunn,nao | ||
96,爻促,爻,48,促,,,,,, | ||
97,扛舒,扛,49,舒,õ,onn,ㆧ,oⁿ,onn,noo | ||
98,扛促,扛,49,促,õh,ohnn,ㆧㆷ,oⁿh,onnh,nooh | ||
99,牛舒,牛,50,舒,iũ,iunn,ㄧㆫ,iuⁿ,iunn,niu | ||
100,牛促,牛,50,促,,,,,, |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# SQL 查詢指引 | ||
|
||
|
||
## 查找某漢字注音 | ||
|
||
``` | ||
SELECT | ||
HJT.[識別號] AS [識別號], | ||
HJT.[聲母] AS [十五音聲母], | ||
HJT.[韻母] AS [十五音韻母], | ||
HJT.[聲調] AS [十五音聲調], | ||
HJT.[常用度] AS [常用度], | ||
SBP.[台語音標] AS [聲母台語音標], | ||
UBP.[台語音標] AS [韻母台語音標], | ||
SBP.[方音符號] AS [聲母方音符號], | ||
UBP.[方音符號] AS [韻母方音符號], | ||
STP.[台羅八聲調] AS [八聲調] | ||
FROM | ||
Han_Ji_Tian HJT | ||
LEFT JOIN | ||
Siann_Bu_Piau SBP ON HJT.[聲母識別號] = SBP.[識別號] | ||
LEFT JOIN | ||
Un_Bu_Piau UBP ON HJT.[韻母識別號] = UBP.[識別號] | ||
LEFT JOIN | ||
Siann_Tiau_Piau STP ON HJT.[聲調識別號] = STP.[識別號] | ||
WHERE | ||
HJT.[漢字] = '不' | ||
ORDER BY | ||
HJT.[常用度] DESC; | ||
``` |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# 雅俗通十五音漢字典查詢模組 | ||
import re | ||
import sqlite3 | ||
|
||
|
||
def connect_to_db_by_context_manager_decorator(db_path): | ||
def connect_to_db(func): | ||
def wrapper(*args, **kwargs): | ||
# 創建數據庫連接 | ||
conn = sqlite3.connect(db_path) | ||
|
||
# 創建一個游標 | ||
cursor = conn.cursor() | ||
|
||
# 執行函數 | ||
result = func(cursor, *args, **kwargs) | ||
|
||
# 關閉數據庫連接 | ||
conn.close() | ||
|
||
return result | ||
|
||
return wrapper | ||
|
||
return connect_to_db | ||
|
||
|
||
def connect_to_db(db_path): | ||
# 創建數據庫連接 | ||
conn = sqlite3.connect(db_path) | ||
|
||
# 創建一個游標 | ||
cursor = conn.cursor() | ||
|
||
return conn, cursor | ||
|
||
|
||
def connect_to_db2(db_path): | ||
# 創建數據庫連接 | ||
conn = sqlite3.connect(db_path) | ||
|
||
# 創建一個游標 | ||
cursor = conn.cursor() # noqa: F841 | ||
|
||
return conn | ||
|
||
|
||
def close_db_connection(conn): | ||
# 關閉數據庫連接 | ||
conn.close() | ||
|
||
|
||
# ========================================================== | ||
# 用 `漢字` 查詢《雅俗通十五音》的標音 | ||
# ========================================================== | ||
def han_ji_cha_piau_im(cursor, han_ji): | ||
""" | ||
根據漢字查詢其讀音資訊。若資料紀錄在`常用度`欄位儲存值為空值(NULL), | ||
則將其視為 0,因此可排在查詢結果的最後。 | ||
:param cursor: 數據庫游標 | ||
:param han_ji: 欲查詢的漢字 | ||
:return: 包含讀音資訊的字典列表 | ||
""" | ||
|
||
query = """ | ||
SELECT | ||
HJT.[識別號] AS [識別號], | ||
HJT.[聲母] AS [十五音聲母], | ||
HJT.[韻母] AS [十五音韻母], | ||
HJT.[聲調] AS [十五音聲調], | ||
HJT.[常用度] AS [常用度], | ||
SBP.[台語音標] AS [聲母台語音標], | ||
UBP.[台語音標] AS [韻母台語音標], | ||
SBP.[方音符號] AS [聲母方音符號], | ||
UBP.[方音符號] AS [韻母方音符號], | ||
STP.[台羅八聲調] AS [八聲調] | ||
FROM | ||
Han_Ji_Tian HJT | ||
LEFT JOIN | ||
Siann_Bu_Piau SBP ON HJT.[聲母識別號] = SBP.[識別號] | ||
LEFT JOIN | ||
Un_Bu_Piau UBP ON HJT.[韻母識別號] = UBP.[識別號] | ||
LEFT JOIN | ||
Siann_Tiau_Piau STP ON HJT.[聲調識別號] = STP.[識別號] | ||
WHERE | ||
HJT.[漢字] = ? | ||
ORDER BY | ||
COALESCE(HJT.[常用度], 0) DESC; | ||
""" | ||
|
||
cursor.execute(query, (han_ji,)) | ||
results = cursor.fetchall() | ||
|
||
# 將結果轉換為字典列表 | ||
fields = [ | ||
'識別號', '十五音聲母', '十五音韻母', '十五音聲調', '常用度', | ||
'聲母台語音標', '韻母台語音標', '聲母方音符號', '韻母方音符號', '八聲調' | ||
] | ||
|
||
return [dict(zip(fields, result)) for result in results] | ||
|
||
|
||
|
||
# ========================================================== | ||
# 自漢字的「注音碼」,分析出:聲母、韻母、調號 | ||
# ========================================================== | ||
def split_cu_im(cu_im): | ||
sing_bu_pattern = re.compile(r"(b|ch|c|g|h|j|kh|k|l|m|ng|n|ph|p|s|th|t|Ø)") | ||
result = [] | ||
|
||
sing_bu = sing_bu_pattern.match(cu_im).group() | ||
un_bu = cu_im[len(sing_bu) : len(cu_im) - 1] | ||
tiau = cu_im[len(cu_im) - 1] | ||
|
||
result += [sing_bu] | ||
result += [un_bu] | ||
result += [tiau] | ||
return result | ||
|
||
|
||
if __name__ == "__main__": | ||
# 在所有測試開始前,連接資料庫 | ||
conn = sqlite3.connect('Nga_Siok_Thong_Sip_Ngoo_Im.db') # 替換為實際資料庫路徑 | ||
cursor = conn.cursor() | ||
|
||
#-------------------------------------------------- | ||
# 測試 `han_ji_cha_piau_im` 函數 | ||
#-------------------------------------------------- | ||
han_ji = '不' | ||
result = han_ji_cha_piau_im(cursor, han_ji) | ||
print(result) | ||
assert result[0]['十五音聲母'] == '邊', "轉換錯誤!" | ||
assert result[0]['十五音韻母'] == '君', "轉換錯誤!" | ||
assert result[0]['十五音聲調'] == '上入', "轉換錯誤!" | ||
assert result[0]['八聲調'] == 4, "轉換錯誤!" | ||
assert result[0]['聲母台語音標'] == 'p', "轉換錯誤!" | ||
assert result[0]['韻母台語音標'] == 'ut', "轉換錯誤!" | ||
assert result[0]['聲母方音符號'] == 'ㄅ', "轉換錯誤!" | ||
assert result[0]['韻母方音符號'] == 'ㄨㆵ', "轉換錯誤!" | ||
|
||
# 在所有測試結束後,關閉資料庫連接 | ||
conn.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import sqlite3 | ||
import unittest | ||
|
||
from mod_雅俗通 import han_ji_cha_piau_im, split_cu_im # 假設你將函數保存在 'your_module.py' 中 | ||
|
||
|
||
class TestHanJiChaPiauIm(unittest.TestCase): | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
# 在所有測試開始前,連接資料庫 | ||
cls.conn = sqlite3.connect('Nga_Siok_Thong_Sip_Ngoo_Im.db') # 替換為實際資料庫路徑 | ||
cls.cursor = cls.conn.cursor() | ||
|
||
@classmethod | ||
def tearDownClass(cls): | ||
# 在所有測試結束後,關閉資料庫連接 | ||
cls.conn.close() | ||
|
||
def test_han_ji_cha_piau_im(self): | ||
# 測試漢字查詢功能 | ||
han_ji = '不' | ||
result = han_ji_cha_piau_im(self.cursor, han_ji) | ||
self.assertEqual(result[0]['十五音聲母'], '邊', "轉換錯誤!") | ||
self.assertEqual(result[0]['十五音韻母'], '君', "轉換錯誤!") | ||
self.assertEqual(result[0]['十五音聲調'], '上入', "轉換錯誤!") | ||
self.assertEqual(result[0]['八聲調'], 4, "轉換錯誤!") | ||
self.assertEqual(result[0]['聲母台語音標'], 'p', "轉換錯誤!") | ||
self.assertEqual(result[0]['韻母台語音標'], 'ut', "轉換錯誤!") | ||
self.assertEqual(result[0]['聲母方音符號'], 'ㄅ', "轉換錯誤!") | ||
self.assertEqual(result[0]['韻母方音符號'], 'ㄨㆵ', "轉換錯誤!") | ||
|
||
def test_split_cu_im(self): | ||
# 測試 `split_cu_im` 函數 | ||
cu_im = "put4" | ||
result = split_cu_im(cu_im) | ||
self.assertEqual(result[0], 'p', "聲母錯誤!") | ||
self.assertEqual(result[1], 'ut', "韻母錯誤!") | ||
self.assertEqual(result[2], '4', "聲調錯誤!") | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |