Skip to content

Commit

Permalink
Added Phonetic System Batch Converter
Browse files Browse the repository at this point in the history
  • Loading branch information
Cadlaxa committed Jul 29, 2024
1 parent 2f16037 commit dd64510
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 14 deletions.
128 changes: 114 additions & 14 deletions OU Dictionary Editor/OpenUtau_Dictionary_Editor.pyw
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import configparser
from Assets.modules import requests
import zipfile
from zipfile import ZipFile
import shutil, threading, subprocess, copy, platform, gzip, pyglet, pyperclip, io
import shutil, threading, subprocess, copy, platform, gzip, pyglet, pyperclip, io, csv
import ctypes as ct
import json, pickle, darkdetect, webbrowser, markdown2, glob, chardet
from tkhtmlview import HTMLLabel
Expand All @@ -33,6 +33,7 @@ ASSETS = P('./Assets')
ICON = P('./Assets/icon.png')
ICON1 = P('./Assets/icon.ico')
CACHE = P('./Cache')
PHONEME_SYSTEMS = TEMPLATES / P('phoneme systems.csv')
# soon
AUTOSAVES = P('./Autosaves and Backups')

Expand Down Expand Up @@ -142,6 +143,8 @@ class Dictionary(TkinterDnD.Tk):
self.redo_stack = []
self.copy_stack = []
self.plugin_file = None
self.phoneme_map = {}
self.systems = []

self.template_var = tk.StringVar(value="Custom Template")
self.entries_window = None
Expand Down Expand Up @@ -1635,36 +1638,69 @@ class Dictionary(TkinterDnD.Tk):
def regex_replace_dialog(self):
if self.replace_window is None or not self.replace_window.winfo_exists():
self.replace_window = tk.Toplevel(self)
self.replace_window.resizable(False, False)
self.replace_window.title("Regex Replace")
self.save_state_before_change()
self.load_csv()

reg_frame = ttk.Frame(self.replace_window, style='Card.TFrame')
reg_frame.pack(padx=10, pady=10, fill="x")
card_frame = ttk.Frame(self.replace_window, style='Card.TFrame')
card_frame.pack(padx=10, pady=10, fill="both", expand=True)

reg_frame = ttk.Frame(card_frame, style='Card.TFrame')
reg_frame.grid(padx=10, pady=10, sticky='nsew', row=1)
reg_frame.grid_columnconfigure(0, weight=1)
reg_frame.grid_columnconfigure(1, weight=1)

reg_frame1 = ttk.Frame(card_frame)
reg_frame1.grid(padx=10, pady=10, sticky='nsew', row=0)
reg_frame1.grid_columnconfigure(0, weight=1)
reg_frame1.grid_columnconfigure(1, weight=20)

# Fields for entering regex pattern and replacement text
reg_pat = ttk.Label(reg_frame, text="Regex Pattern:", font=self.font)
reg_pat = ttk.Label(reg_frame1, text="Regex Pattern:", font=self.font)
reg_pat.grid(row=0, column=0, padx=10, pady=20)
self.localizable_widgets['reg_pattern'] = reg_pat
regex_var = tk.StringVar()
regex_entry = ttk.Entry(reg_frame, textvariable=regex_var, width=30)
regex_entry.grid(row=0, column=1, padx=10, pady=5, sticky="ew")
regex_entry = ttk.Entry(reg_frame1, textvariable=regex_var, width=30)
regex_entry.grid(row=0, column=1, padx=15, pady=5, sticky="ew")

reg_rep = ttk.Label(reg_frame, text="Replacement:", font=self.font)
reg_rep = ttk.Label(reg_frame1, text="Replacement:", font=self.font)
reg_rep.grid(row=1, column=0, padx=10, pady=5)
self.localizable_widgets['replacement'] = reg_rep
replace_var = tk.StringVar()
replace_entry = ttk.Entry(reg_frame, textvariable=replace_var, width=30)
replace_entry.grid(row=1, column=1, padx=10, pady=5, sticky="ew")
replace_entry = ttk.Entry(reg_frame1, textvariable=replace_var, width=30)
replace_entry.grid(row=1, column=1, padx=15, pady=5, sticky="ew")

# Radio buttons to select target (graphemes or phonemes)
target_var = tk.StringVar(value="Phonemes")
ttk.Radiobutton(reg_frame, text="Graphemes", style="TRadiobutton", variable=target_var, value="Graphemes").grid(row=2, column=0, padx=10, pady=(20, 5), sticky="w")
ttk.Radiobutton(reg_frame, text="Phonemes", style="TRadiobutton", variable=target_var, value="Phonemes").grid(row=2, column=1, padx=10, pady=(20, 5), sticky="w")
ttk.Radiobutton(reg_frame, text="Graphemes", style="TRadiobutton", variable=target_var, value="Graphemes").grid(row=2, column=0, padx=(50,10), pady=(20, 5), sticky="w")
ttk.Radiobutton(reg_frame, text="Phonemes", style="TRadiobutton", variable=target_var, value="Phonemes").grid(row=2, column=1, padx=(50,10), pady=(20, 5), sticky="w")

# Combobox for `From Selected Phonetic System`
phone_frame_from = ttk.Frame(reg_frame)
phone_frame_from.grid(padx=(15,0), pady=(10,0), sticky="nsew", row=3, column=0)
phone_frame_from.grid_columnconfigure(0, weight=30)
phone_frame_from.grid_columnconfigure(1, weight=0)

self.combo_from = ttk.Combobox(phone_frame_from, values=self.systems, state="readonly")
self.combo_from.grid(row=0, column=0, sticky='nsew')
self.combo_from.set("Phonetic System")

to_tove_lo = ttk.Button(phone_frame_from, style='Accent.TButton', text="▶", command=self.system_phonemes)
to_tove_lo.grid(row=0, column=1, padx=10)

# Combobox for `To Selected Phonetic System`
phone_frame_to = ttk.Frame(reg_frame)
phone_frame_to.grid(padx=(0,15), pady=(10,0), sticky='nsew', row=3, column=1)
phone_frame_to.grid_columnconfigure(0, weight=1)
phone_frame_to.grid_columnconfigure(1, weight=0)

self.combo_to = ttk.Combobox(phone_frame_to, values=self.systems, state="readonly")
self.combo_to.grid(row=0, column=0, sticky='nsew')
self.combo_to.set("Phonetic System")

rep_frame = ttk.Frame(reg_frame)
rep_frame.grid(padx=10, pady=10, sticky="nsew", row=3, column=1)
rep_frame.grid(padx=(10,15), pady=5, sticky="nsew", row=4, column=1)
rep_frame.grid_columnconfigure(0, weight=1)
rep_frame.grid_columnconfigure(1, weight=3)

Expand All @@ -1678,7 +1714,7 @@ class Dictionary(TkinterDnD.Tk):
self.localizable_widgets['apply1'] = apply_button1

find_frame = ttk.Frame(reg_frame)
find_frame.grid(padx=(10,0), pady=10, sticky="nsew", row=3, column=0)
find_frame.grid(padx=(10,0), pady=5, sticky="nsew", row=4, column=0)
find_frame.grid_columnconfigure(0, weight=0)
find_frame.grid_columnconfigure(1, weight=0)
find_frame.grid_columnconfigure(2, weight=5)
Expand Down Expand Up @@ -1765,9 +1801,73 @@ class Dictionary(TkinterDnD.Tk):
self.phoneme_entry.delete(0, tk.END)
if self.search_var.get():
self.filter_treeview()
self.replace_window.destroy()
self.icon(self.replace_window)
self.apply_localization()

def system_phonemes(self):
system_from = self.combo_from.get()
system_to = self.combo_to.get()
self.save_state_before_change()

# Ensure systems are selected
if not system_from or not system_to:
messagebox.showinfo("Error", "Please select both 'From' and 'To' phonetic systems.")
return

# Ensure the selected systems are in the phoneme map
if system_from not in self.phoneme_map or system_to not in self.phoneme_map:
messagebox.showinfo("Error", "Selected systems are not available.")
return

phoneme_map_from = self.phoneme_map[system_from]
phoneme_map_to = self.phoneme_map[system_to]

# Create a reverse mapping for phoneme_map_to
inverse_phoneme_map_to = {v: k for k, v in phoneme_map_to.items()}

def replace_phonemes(phoneme_sequence):
replaced_sequence = []
i = 0
while i < len(phoneme_sequence):
match_found = False
# Check for the longest possible match from current position
for j in range(len(phoneme_sequence), i, -1):
substring = ' '.join(phoneme_sequence[i:j])
if substring in phoneme_map_from:
source_phoneme = phoneme_map_from[substring]
replacement = inverse_phoneme_map_to.get(source_phoneme, substring)
# Split multi-phoneme replacements by commas
if ' ' in replacement:
replaced_sequence.extend(replacement.split(' '))
else:
replaced_sequence.append(replacement)
i = j
match_found = True
break
if not match_found:
replaced_sequence.append(phoneme_sequence[i])
i += 1
return replaced_sequence

# Iterate through the dictionary and update phonemes
for key, value in self.dictionary.items():
if isinstance(value, list):
self.dictionary[key] = replace_phonemes(value)
else:
print(f"Unexpected value type for key {key}: {type(value)}")
self.refresh_treeview()

def load_csv(self):
csv_file_path = PHONEME_SYSTEMS
with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
self.systems = next(reader)
for row in reader:
for i, system in enumerate(self.systems):
if system not in self.phoneme_map:
self.phoneme_map[system] = {}
if row[i]:
self.phoneme_map[system][row[i]] = row[0] # Map phoneme to its replacement

def find_matches(self, pattern, target):
items_to_highlight = []
Expand Down
1 change: 1 addition & 0 deletions OU Dictionary Editor/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Fix paste function for quoted graphemes
- Added Regenate YAML template from reclist function
- Separate `Plugins` tab
- Added Phonetic System replace (Users can add other phonetic systems by editing the `phoneme systems.csv` on the `Templates` folder)

**`(7/24/24)`**
- Regex find and replace now directly iterates and edits the self.dictionary (the data that holds the graphemes and phonemes) instead of the treeview
Expand Down
82 changes: 82 additions & 0 deletions OU Dictionary Editor/Templates/phoneme systems.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
Arpabet (Vanilla),Arpabet (Extended),CZ-Sampa,Vocaloid-Sampa,X-sampa,IPA,Romaji
aa,aa,a,Q,A,ɑ,a
aa r,ar,ar,Q@,Ar,ɒɹ,a r
ae,ae,@,{,{,æ,a
ae n,ea n,&n,{ n,e@n,eən,e N
ae ng,ea ng,Ang,{ N,E~N,ɛ̃ŋ,e ng
ah,ah,u,V,V,ʌ,a
ao,ao,9,O:,Q,ɒ,o
ao,ox,0,O:,o,o,o
ao r,or,0r,O@,Or,ɔɹ,o r
ax,ax,x,@,@,ə,a
eh,eh,e,e,E,ɛ,e
eh r,air,Ar,e@,er,eɹ,e r
er,er,3,@r,@`,ə˞,a w
ih,ih,i,I,I,ɪ,i
ih ng,ing,1ng,I N,I~N,ɪ̃ŋ,i ng
ih r,ir,Er,I@,ir,iɹ,i r
iy,iy,E,i:,i,i,i
uh,uh,6,U,U,ʊ,u
uw,uw,o,u:,u,u,u
uw r,ur,or,U@,Ur,ʊɹ,u r
aw,aw,8,aU,aU,aʊ,a u
aw n,aun,8n,aU n,aU~n,aʊ̃n,a u n
ay,ay,I,aI,aI,aɪ,a i
ey,ey,A,eI,eI,eɪ,e i
ow,ow,O,@U,oU,oʊ,o w
oy,oy,Q,OI,OI,ɔɪ,o y
b,b,b,bh,b,b,b
ch,ch,ch,tS,tS,tʃ,ch
d,d,d,dh,d,d,d
dh,dh,dh,D,D,ð,d
dx,dx,dd,4,4,ɾ,r
ax l,el,6l,@l,5=,ɫ̩,u l
f,f,f,f,f,f,f
g,g,g,gh,g,g,g
hh,hh,h,h,h,h,h
hv,hv,hh,C,C,ç,hy
jh,jh,j,dZ,dZ,dʒ,j
k,k,k,kh,k,k,k
l,l,l,l0,l,l,l
l,l,l,l,5,ɫ,l
m,m,m,m,m,m,m
n,n,n,n,n,n,n
ng,ng,ng,N,N,ŋ,ng
p,p,p,ph,p,p,p
q,q,?,?,?,ʔ,?
r,r,r,r,r,ɹ,r
s,s,s,s,s,s,s
sh,sh,sh,S,S,ʃ,sh
t,t,t,th,t,t,t
th,th,th,T,T,θ,t
v,v,v,v,v,v,v
w,w,w,w,w,w,w
y,y,y,j,j,j,y
z,z,z,z,z,z,z
zh,zh,zh,Z,Z,ʒ,j
b y,b y,b y,bh j,b j,b j,by
b w,b w,b w,bh w,b w,b w,bw
d y,d y,d y,dh j,d j,d j,dy
d w,d w,d w,dh w,d w,d w,dw
f y,f y,f y,f j,f j,f j,fy
f w,f w,f w,f w,f w,f w,fw
g y,g y,g y,gh j,g j,g j,gy
g w,g w,g w,gh w,g w,g w,gw
hh y,hh y,hh y,C j,hh j,ç j,hy
hh w,hh w,h w,hh w,h w,h w,hw
k y,k y,k y,kh j,k j,k j,ky
k w,k w,k w,kh w,k w,k w,kw
l y,l y,l y,l j,l j,l j,ly
m y,m y,m y,m j,m j,m j,my
m w,m w,m w,m w,m w,m w,mw
n y,n y,n y,n j,n j,n j,ny
ng y,ng y,ng y,N j,N j,ŋ j,ngy
p y,p y,p y,p j,p j,p j,py
dx y,dx y,dd y,4 j,4 j,ɾ j,ry
s y,s y,s y,s j,s j,s j,sy
t y,t y,t y,t j,t j,t j,ty
t w,t w,t w,t w,t w,t w,tw
v y,v y,v y,v j,v j,v j,vy
z y,z y,z y,z j,z j,z j,zy
t s,t s,t s,t s,t s,t s,ts
hh iy,hv iy,hh E,C i:,C i,ç i,h i

0 comments on commit dd64510

Please sign in to comment.