-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhandle.py
executable file
·237 lines (192 loc) · 6.64 KB
/
handle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/env python3
import argparse
import os
import sys
parser = argparse.ArgumentParser(
description="Convert TRA files from Windows-specific encoding to UTF-8 and back",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("--tra-path", dest="tra_path", help="source tra directory path", required=True)
parser.add_argument("--out-path", dest="out_path", help="directory path for converted files", required=True)
parser.add_argument(
"--from-utf8", dest="from_utf8", help="reverse conversion: from ANSI to UTF-8", action="store_true", default=False
)
parser.add_argument(
"--split-console",
dest="split_console",
help="Generate separate console message files from setup.tra and install.tra."
"This will create setup-win32.tra, setup-unix.tra, etc for each OS with ANSI encoding.",
action="store_true",
default=False,
)
args = parser.parse_args()
# If split_console is true, these files should be in ANSI encoding
# Otherwise, in UTF-8
CONSOLE_FILES = ["setup.tra", "install.tra"]
CHARSET_MAP = {
"schinese": "cp936",
"zh_CN": "cp936",
"tchinese": "cp950",
"czech": "cp1250",
"cs_CZ": "cp1250",
"english": "cp1252",
"american": "cp1252",
"en_US": "cp1252",
"french": "cp1252",
"francais": "cp1252",
"fr_FR": "cp1252",
"german": "cp1252",
"deutsch": "cp1252",
"de_DE": "cp1252",
"italian": "cp1252",
"italiano": "cp1252",
"it_IT": "cp1252",
"japanese": "cp932",
"ja_JP": "cp932",
"korean": "cp932",
"ko_KR": "cp932",
"polish": "cp1250",
"polski": "cp1250",
"pl_PL": "cp1250",
"portuguese": "cp1252",
"pt_BR": "cp1252",
"russian": "cp1251",
"ru_RU": "cp1251",
"spanish": "cp1252",
"castilian": "cp1252",
"espanol": "cp1252",
"castellano": "cp1252",
"es_ES": "cp1252",
"swedish": "cp1252",
"sw_SE": "cp1252",
"ukrainian": "cp1251",
"uk_UA": "cp1251",
}
COMMENT_NO_MANUAL = (
"// Do not edit. This file is generated automatically by handle-charsets. Your changes will be lost.\n\n"
)
def resave_file(src_path, src_enc, dst_path, dst_enc):
"""
Read source file with source encoding and save it to destination with another encoding
"""
print(f"{src_path}, {src_enc}\t=>\t{dst_path}, {dst_enc}")
with open(src_path, encoding=src_enc) as ifile:
data = ifile.read()
# prepare directory
dst_dir = os.path.dirname(dst_path)
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
else:
if not os.path.isdir(dst_dir):
os.remove(dst_dir)
os.makedirs(dst_dir)
data = COMMENT_NO_MANUAL + data
with open(dst_path, mode="w", encoding=dst_enc) as ofile:
ofile.write(data)
def find_files(path, ext):
"""
@path: directory path
@ext: file extension
Returns a list of files with this extensions
"""
flist = []
for root, dirs, files in os.walk(path, followlinks=True): # pylint: disable=unused-variable
for fname in files:
if fname.lower().endswith(ext.lower()):
flist.append(os.path.join(root, fname))
flist = sorted(flist)
return flist
def get_win_encoding(language, file_path):
"""
Determines windows-specific encoding for the file
"""
if "_" not in language:
language = language.lower()
filename = get_filename(file_path)
if filename in CONSOLE_FILES and language in ["russian", "ukrainian", "ru_RU", "uk_UA"]:
return "cp866"
if filename == "ee.tra" or filename.endswith("_ee.tra"):
return "utf-8"
if language in CHARSET_MAP:
encoding = CHARSET_MAP[language]
return encoding
print(f"Failed to infer encoding for file {file_path} in language {language}")
sys.exit(1)
def get_dst_encoding(language, file_path, from_utf8, split_console):
"""
Return encoding to save the converted file in.
"""
filename = get_filename(file_path)
# Console messages should be in UTF-8, unless we're splitting them.
if filename in CONSOLE_FILES:
if split_console:
return get_win_encoding(language, file_path)
return "utf-8"
if from_utf8:
return get_win_encoding(language, file_path)
return "utf-8"
def get_src_encoding(language: str, file_path: str, from_utf8: bool) -> str:
"""
Return encoding to read the source file in
"""
filename = get_filename(file_path)
# Source console messages are assumed to be in UTF-8 for new WeiDU.
if filename in CONSOLE_FILES:
return "utf-8"
if from_utf8:
return "utf-8"
return get_win_encoding(language, file_path)
def get_language(dirpath: str) -> str:
"""
Gets language component from tra directory path
"""
return dirpath.split(os.path.sep)[0]
def get_relpath(tra_file, tra_path):
"""
Returns tra_file's path relative to tra_path
"""
relpath = os.path.relpath(tra_file, start=tra_path)
return relpath
def get_dirpath(tra_relpath):
"""
Returns tra_file's directory component relative to tra_path
"""
dirpath = os.path.dirname(tra_relpath)
return dirpath
def get_filename(filepath):
"""
Returns lowecased basename
"""
return os.path.basename(filepath).lower()
def get_os_path(relpath, weidu_os):
"""
Takes relative console tra file path, returns OS-specific file path for it:
tra/setup.tra -> tra/setup-win32.tra
"""
dirname = get_dirpath(relpath)
filename = get_filename(relpath)
base, ext = os.path.splitext(filename)
filename = f"{base}-{weidu_os}{ext}"
os_path = os.path.join(dirname, filename)
return os_path
def main():
"""Main function"""
tra_files = find_files(args.tra_path, "tra")
for tra_file in tra_files:
relpath = get_relpath(tra_file, args.tra_path)
dirpath = get_dirpath(relpath)
language = get_language(dirpath)
src_encoding = get_src_encoding(language, tra_file, args.from_utf8)
dst_encoding = get_dst_encoding(language, tra_file, args.from_utf8, args.split_console)
tra_out_file = os.path.join(args.out_path, relpath)
filename = get_filename(relpath)
if args.split_console and filename in CONSOLE_FILES:
console_out_file = get_os_path(tra_out_file, "win32")
resave_file(tra_file, src_encoding, console_out_file, dst_encoding)
for weidu_os in ["unix", "osx"]:
console_out_file = get_os_path(tra_out_file, weidu_os)
resave_file(tra_file, src_encoding, console_out_file, "utf-8")
else:
resave_file(tra_file, src_encoding, tra_out_file, dst_encoding)
if __name__ == "__main__":
main()