-
Notifications
You must be signed in to change notification settings - Fork 0
/
encoding.py
48 lines (42 loc) · 1.08 KB
/
encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# -*- coding: utf-8 -*-
"""
Provides a translation method that strips Turkish characters and replaces
them with ASCII equivalents.
"""
from __future__ import print_function
from __future__ import unicode_literals
translate_table = {
ord("â"): "a",
ord("Â"): "a",
ord("ç"): "c",
ord("Ç"): "C",
ord("ğ"): "g",
ord("Ğ"): "g",
ord("ı"): "i",
ord("I"): "i",
ord("î"): "i",
# buyuk i harfi.
ord("İ"): "i",
# sapkali I
ord("Î"): "i",
ord("ş"): "s",
ord("Ş"): "s",
ord("ö"): "o",
ord("Ö"): "o",
ord("û"): "u",
ord("Û"): "u",
ord("ü"): "u",
ord("Ü"): "u",
}
def normalize(s):
"""
Transforms a unicode string so that it can be searched and found even when
it is not exactly the same. So for example a user can search for "Oğlak"
and we can find "oğlak" by normalizing both to "oglak".
Lowercases all the letters and anglicanizes it.
u"Oğlak" => oglak
u"başucu" => basucu
u"Noel Baba" => noel baba
"""
s = s.lower()
return s.translate(translate_table)