Skip to content

Commit

Permalink
重構專案文檔管理架構;並請查字典找漢字讀音的資料庫改用 SQLite 。
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanJui committed Mar 8, 2024
1 parent 9e697db commit 6704fbc
Show file tree
Hide file tree
Showing 30 changed files with 242 additions and 41 deletions.
Binary file modified Kong_Un.db
Binary file not shown.
3 changes: 3 additions & 0 deletions config_dev_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@

# Constants
WAIT_TIME = 5 # seconds

# Database
DATABASE_PATH = '.\\Kong_Un.db'
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 廣韻切語字典資料模型

```plantuml
@startyaml
#highlight "切語上字"
#highlight "切語上字" / "漢字"
Expand All @@ -22,4 +25,5 @@
韻母: 東一
調: 平
擬音: ong
@endyaml
@endyaml
```
File renamed without changes.
File renamed without changes.
File renamed without changes.
31 changes: 0 additions & 31 deletions docs/diagrams/example/a001.puml

This file was deleted.

26 changes: 26 additions & 0 deletions docs/diagrams/m100_建立標注音檔案.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# m100 建立標注音檔案

```plantuml
@startuml
start
partition 新建活頁簿檔案 {
:建立一個 Excel 活頁簿檔案;
:將預設工作表的 A 欄寬度設為 128;
:繼上,設定 A 欄所有儲存格,皆可自動換行;
}
' 自 output 子目錄、env.xlsx 活頁簿檔案,抄錄 env 工作表;
partition 複製模版工作表 {
:打開模版活頁簿檔案 ./output/env.xlsx;
:指定此活頁簿檔案為來源檔案;
:將來源檔案的工作表複製到新建活頁簿檔案中;
}
partition 存檔處理 {
:將新建之活頁簿檔案,以 "Piau-Tsu-Im.xlsx" 為檔名儲存;
}
stop
@enduml
```
2 changes: 2 additions & 0 deletions docs/diagrams/m300_查字典標注音.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# m300 查字典標注音

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 3 additions & 2 deletions docs/SDD_001.md → libs/diagrams/program_flow_chart.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

```plantuml
@startuml
start
:Let entSet be a set of Entitlements to revoke;
Expand Down Expand Up @@ -36,6 +38,5 @@ entitlements of entSet entitlements;;
Consumers that have an entitlement in entSet;
stop
@enduml

plantuml
```

Binary file modified output/Piau-Tsu-Im.xlsx
Binary file not shown.
Binary file not shown.
Binary file modified output/【河洛話注音】水龍吟·登建康賞心亭.xlsx
Binary file not shown.
13 changes: 6 additions & 7 deletions p100_tsa_ji_tian.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import re

import psycopg2
import sqlite3
import xlwings as xw

# 專案全域常數
from config_dev_env import DATABASE_PATH

def main_run(CONVERT_FILE_NAME):
# ==========================================================
Expand Down Expand Up @@ -59,10 +61,7 @@ def main_run(CONVERT_FILE_NAME):
# =========================================================="
# 資料庫",
# =========================================================="
conn = psycopg2.connect(
# database="alanjui", user="alanjui", host="127.0.0.1", port="5432"
database="Ho_Lok_Ue", user="postgres", password="ChingHai99@", host="127.0.0.1", port="5432"
)
conn = sqlite3.connect(DATABASE_PATH)
db_cursor = conn.cursor()
source_index = 1 # index for source sheet
target_index = 1
Expand Down Expand Up @@ -110,8 +109,8 @@ def main_run(CONVERT_FILE_NAME):
# from han_ji
# where han_ji='{search_han_ji}'
sql = (
"SELECT id, han_ji, chu_im, freq, siann, un, tiau "
"FROM han_ji_dict "
"SELECT id, han_ji, tl_im, freq, siann, un, tiau "
"FROM Sip_Ngoo_Im_Han_Ji_Tian "
f"WHERE han_ji='{beh_tshue_tsu_im_e_ji}' "
"ORDER BY freq DESC;"
)
Expand Down
197 changes: 197 additions & 0 deletions p100_tsa_ji_tian_postgres.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import re

import psycopg2
import xlwings as xw


def main_run(CONVERT_FILE_NAME):
# ==========================================================
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號
# 聲母、韻母、調號,分別存放在 C、D、E 欄
# ==========================================================

# 指定提供來源的【檔案】
file_path = CONVERT_FILE_NAME
wb = xw.Book(file_path)

# 指定提供來源的【工作表】;及【總列數】
source_sheet = wb.sheets["漢字注音表"]
end_of_row_no = (
source_sheet.range("A" + str(source_sheet.cells.last_cell.row)).end("up").row
)
end_of_row_no = int(end_of_row_no) - 1
print(f"end_row = {end_of_row_no}")

# ==========================================================
# 備妥程式需使用之工作表
# ==========================================================
sheet_name_list = [
"缺字表",
"字庫表",
]
# ----------------------------------------------------------
# 檢查工作表是否已存在?
# 若已存在,則清除工作表內容;
# 若不存在,則新增工作表
# ----------------------------------------------------------
for sheet_name in sheet_name_list:
sheet = wb.sheets[sheet_name]
try:
sheet.select()
sheet.clear()
continue
except Exception as e:
# CommandError 的 Exception 發生日,表工作表不存在
# 新增程式需使用之工作表
print(e)
wb.sheets.add(name=sheet_name)

khiam_ji_piau = wb.sheets["缺字表"]
ji_khoo_piau = wb.sheets["字庫表"]

# ==========================================================
# 在「漢字注音表」B欄已有台羅拼音,需將之拆分成聲母、韻母、調號
# 聲母、韻母、調號,分別存放在 C、D、E 欄
# ==========================================================
han_ji_tsu_im_piau = wb.sheets["漢字注音表"]
han_ji_tsu_im_piau.select()

# =========================================================="
# 資料庫",
# =========================================================="
conn = psycopg2.connect(
# database="alanjui", user="alanjui", host="127.0.0.1", port="5432"
database="Ho_Lok_Ue", user="postgres", password="ChingHai99@", host="127.0.0.1", port="5432"
)
db_cursor = conn.cursor()
source_index = 1 # index for source sheet
target_index = 1
ji_khoo_index = 1
khiam_ji_index = 1

while source_index <= end_of_row_no:
print(f"row = {source_index}")
# 自 source_sheet 取出一個「欲查注音的漢字」(beh_tshue_tsu_im_e_ji)
beh_tshue_tsu_im_e_ji = str(
source_sheet.range("A" + str(source_index)).value
).strip()

# =========================================================
# 如是空白或換行,處理換行
# =========================================================
if beh_tshue_tsu_im_e_ji == " " or beh_tshue_tsu_im_e_ji == "":
target_index += 1
source_index += 1
continue
elif beh_tshue_tsu_im_e_ji == "\n":
han_ji_tsu_im_piau.range("A" + str(target_index)).value = "\n"
target_index += 1
source_index += 1
continue

# =========================================================
# 若取出之字為標點符號,則跳過,並繼續取下一個漢字。
# =========================================================
piau_tiam_1 = r"[,、:;.。?!()「」【】《》“]"
piau_tiam_2 = r"[\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F\uFE50-\uFE5E]" # noqa: E501
# piau_tiam = r"[\u2013-\u2026\u3000-\u303F\uFE50-\uFF20]"
piau_tiam = f"{piau_tiam_1}|{piau_tiam_2}"
is_piau_tiam = re.search(piau_tiam, beh_tshue_tsu_im_e_ji, re.M | re.I)
if is_piau_tiam:
target_index += 1
source_index += 1
continue

# =========================================================
# 在【字庫】資料庫查找【注音碼】
# SQL 查詢指令:自字庫查找某漢字之注音碼
# =========================================================
# sql = select id, han_ji, chu_im, freq, siann, un, tiau
# from han_ji
# where han_ji='{search_han_ji}'
sql = (
"SELECT id, han_ji, chu_im, freq, siann, un, tiau "
"FROM han_ji_dict "
f"WHERE han_ji='{beh_tshue_tsu_im_e_ji}' "
"ORDER BY freq DESC;"
)
db_cursor.execute(sql)
ji_e_piau_im = db_cursor.fetchall()

# =========================================================
# 若是查不到漢字的注音碼,在【缺字表】做記錄
# =========================================================
if not ji_e_piau_im:
print(f"Can not find 【{beh_tshue_tsu_im_e_ji}】in Han-Ji-Khoo!!")
# 記錄【缺字表】的【列號】
khiam_ji_piau.range("A" + str(khiam_ji_index)).value = khiam_ji_index
# 記錄【缺字表】的【漢字】
khiam_ji_piau.range("B" + str(khiam_ji_index)).value = beh_tshue_tsu_im_e_ji
# 記錄【漢字注音表】的【列號】
khiam_ji_piau.range("C" + str(khiam_ji_index)).value = source_index
khiam_ji_index += 1
target_index += 1
source_index += 1
continue

# =========================================================
# 自【字庫】查到的【漢字】,取出:聲母、韻母、調號
# =========================================================
piau_im_tsong_soo = len(ji_e_piau_im)
han_ji_id = ji_e_piau_im[0][0]
tsu_im = ji_e_piau_im[0][2]
freq = ji_e_piau_im[0][3]
siann_bu = ji_e_piau_im[0][4]
un_bu = ji_e_piau_im[0][5]
tiau_ho = ji_e_piau_im[0][6]

# =========================================================
# 寫入:【漢字注音表】
# =========================================================
han_ji_tsu_im_piau.range("B" + str(target_index)).value = tsu_im
han_ji_tsu_im_piau.range("C" + str(target_index)).value = siann_bu
han_ji_tsu_im_piau.range("D" + str(target_index)).value = un_bu
han_ji_tsu_im_piau.range("E" + str(target_index)).value = tiau_ho
han_ji_tsu_im_piau.range("F" + str(target_index)).value = piau_im_tsong_soo
han_ji_tsu_im_piau.range("G" + str(target_index)).value = freq

# =========================================================
# 若是查到漢字有一個以上的注音碼,在【字庫表】做記錄
# ji_khoo_sheet = wb.sheets["字庫表"]
# =========================================================
if piau_im_tsong_soo > 1:
for piau_im_index in range(piau_im_tsong_soo):
han_ji_id = ji_e_piau_im[piau_im_index][0]
tsu_im = ji_e_piau_im[piau_im_index][2]
freq = ji_e_piau_im[piau_im_index][3]
siann_bu = ji_e_piau_im[piau_im_index][4]
un_bu = ji_e_piau_im[piau_im_index][5]
tiau_ho = ji_e_piau_im[piau_im_index][6]

# 記錄對映至【漢字注音表】的【列號】
ji_khoo_piau.range("A" + str(ji_khoo_index)).value = source_index

# 記錄【字庫】資料庫的【紀錄識別碼(Record ID of Table)】
ji_khoo_piau.range("B" + str(ji_khoo_index)).value = han_ji_id

ji_khoo_piau.range(
"C" + str(ji_khoo_index)
).value = beh_tshue_tsu_im_e_ji
ji_khoo_piau.range("D" + str(ji_khoo_index)).value = tsu_im
ji_khoo_piau.range("E" + str(ji_khoo_index)).value = siann_bu
ji_khoo_piau.range("F" + str(ji_khoo_index)).value = un_bu
ji_khoo_piau.range("G" + str(ji_khoo_index)).value = tiau_ho
ji_khoo_piau.range("H" + str(ji_khoo_index)).value = freq

ji_khoo_index += 1

# =========================================================
# 調整讀取來源;寫入標的各手標
# =========================================================
target_index += 1
source_index += 1

# ==========================================================
# 關閉資料庫
# ==========================================================
conn.close()

0 comments on commit 6704fbc

Please sign in to comment.