This repository has been archived by the owner on Sep 15, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
unique-chars-extractor.py
executable file
·75 lines (62 loc) · 1.92 KB
/
unique-chars-extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/python
# author: YangLe 201204
import codecs
import sys
import re
def extractValuesFromStringsText(text):
def removeComments(text):
# remove /* xxx */
text = re.sub(r'(?s)/\*.*?\*/', '', text)
# remove // xxx
text = re.sub(r'//.*', '', text)
return text
def extractValueFromLine(line):
if not line.strip():
return ''
kv = line.split('=')
if len(kv) != 2:
print 'skip line: %s' % line
return ''
value = kv[1]
value = value.strip(' \t;')
# remove heading & tailing quote
if value[0] == '"' and value[-1] == '"':
value = value[1:-1]
# unescape "
value = re.sub(r'\\"', '"', value)
return value
text = removeComments(text)
lines = [extractValueFromLine(line) for line in text.splitlines()]
text = ''.join(lines)
return text
def uniqueCharsFromText(text):
charSet = set()
for char in text:
charSet.add(char)
return ''.join(charSet)
def uniqueCharsFromStringsText(text):
text = extractValuesFromStringsText(text)
text = uniqueCharsFromText(text)
return text
def doSthWithFile(inputfile, block, outputfile):
with open(inputfile) as f:
text = f.read()
# utf-8 => python internal unicode
text = text.decode('utf-8')
# do something
text = block(text)
# python internal unicode => utf-8
text = text.encode("utf-8")
print text
with open(outputfile, 'w') as f:
f.write(text)
if __name__ == '__main__':
if len(sys.argv) != 3:
print 'usage: ./unique-chars-extractor.py inputfile outputfile'
exit(-1)
inputfile, outputfile = sys.argv[1:]
suffix = '.strings'
if inputfile.endswith(suffix):
doSthWithFile(inputfile, uniqueCharsFromStringsText, outputfile)
else:
doSthWithFile(inputfile, uniqueCharsFromText, outputfile)