1
2
3
4
5 """
6 Simple transliteration
7 """
8
9 __id__ = __revision__ = "$Id: translit.py 32 2006-10-28 07:10:43Z the.pythy $"
10 __url__ = "$URL: https://pythy.googlecode.com/svn/trunk/pytils/pytils/translit.py $"
11
12 import re
13 from pytils import utils
14
15 TRANSTABLE = (
16 (u"‘", u"'"),
17 (u"’", u"'"),
18 (u"«", u'"'),
19 (u"»", u'"'),
20 (u"–", u"-"),
21 (u"…", u"..."),
22 (u"№", u"#"),
23
24
25 (u"Щ", u"Sch"),
26
27
28
29
30 (u"Щ", u"SCH"),
31
32 (u"Ё", u"Yo"),
33 (u"Ё", u"YO"),
34 (u"Ж", u"Zh"),
35 (u"Ж", u"ZH"),
36 (u"Ц", u"Ts"),
37 (u"Ц", u"TS"),
38 (u"Ч", u"Ch"),
39 (u"Ч", u"CH"),
40 (u"Ш", u"Sh"),
41 (u"Ш", u"SH"),
42 (u"Ы", u"Yi"),
43 (u"Ы", u"YI"),
44 (u"Ю", u"Yu"),
45 (u"Ю", u"YU"),
46 (u"Я", u"Ya"),
47 (u"Я", u"YA"),
48
49 (u"А", u"A"),
50 (u"Б", u"B"),
51 (u"В", u"V"),
52 (u"Г", u"G"),
53 (u"Д", u"D"),
54 (u"Е", u"E"),
55 (u"З", u"Z"),
56 (u"И", u"I"),
57 (u"Й", u"J"),
58 (u"К", u"K"),
59 (u"Л", u"L"),
60 (u"М", u"M"),
61 (u"Н", u"N"),
62 (u"О", u"O"),
63 (u"П", u"P"),
64 (u"Р", u"R"),
65 (u"С", u"S"),
66 (u"Т", u"T"),
67 (u"У", u"U"),
68 (u"Ф", u"F"),
69 (u"Х", u"H"),
70 (u"Э", u"E"),
71 (u"Ъ", u"`"),
72 (u"Ь", u"'"),
73
74
75 (u"щ", u"sch"),
76
77 (u"ё", u"yo"),
78 (u"ж", u"zh"),
79 (u"ц", u"ts"),
80 (u"ч", u"ch"),
81 (u"ш", u"sh"),
82 (u"ы", u"yi"),
83 (u"ю", u"yu"),
84 (u"я", u"ya"),
85
86 (u"а", u"a"),
87 (u"б", u"b"),
88 (u"в", u"v"),
89 (u"г", u"g"),
90 (u"д", u"d"),
91 (u"е", u"e"),
92 (u"з", u"z"),
93 (u"и", u"i"),
94 (u"й", u"j"),
95 (u"к", u"k"),
96 (u"л", u"l"),
97 (u"м", u"m"),
98 (u"н", u"n"),
99 (u"о", u"o"),
100 (u"п", u"p"),
101 (u"р", u"r"),
102 (u"с", u"s"),
103 (u"т", u"t"),
104 (u"у", u"u"),
105 (u"ф", u"f"),
106 (u"х", u"h"),
107 (u"э", u"e"),
108 (u"ъ", u"`"),
109 (u"ь", u"'"),
110
111
112
113 (u"c", u"c"),
114 (u"q", u"q"),
115 (u"y", u"y"),
116 (u"x", u"x"),
117 (u"C", u"C"),
118 (u"Q", u"Q"),
119 (u"Y", u"Y"),
120 (u"X", u"X"),
121 (u"1", u"1"),
122 (u"2", u"2"),
123 (u"3", u"3"),
124 (u"4", u"4"),
125 (u"5", u"5"),
126 (u"6", u"6"),
127 (u"7", u"7"),
128 (u"8", u"8"),
129 (u"9", u"9"),
130 (u"0", u"0"),
131 )
132
133 RU_ALPHABET = [x[0] for x in TRANSTABLE]
134 EN_ALPHABET = [x[1] for x in TRANSTABLE]
135 ALPHABET = RU_ALPHABET + EN_ALPHABET
136
137
139 """
140 Translify russian text
141
142 @param in_string: input string
143 @type in_string: C{unicode}
144
145 @return: transliterated string
146 @rtype: C{str}
147
148 @raise TypeError: when in_string is not C{unicode}
149 @raise ValueError: when string doesn't transliterate completely
150 """
151 utils.check_type('in_string', unicode)
152
153 translit = in_string
154 for symb_in, symb_out in TRANSTABLE:
155 translit = translit.replace(symb_in, symb_out)
156
157 try:
158 translit = str(translit)
159 except UnicodeEncodeError:
160 raise ValueError("Unicode string doesn't transliterate completely, " + \
161 "is it russian?")
162
163 return translit
164
166 """
167 Detranslify
168
169 @param in_string: input string
170 @type in_string: C{basestring}
171
172 @return: detransliterated string
173 @rtype: C{str}
174
175 @raise TypeError: when in_string neither C{str}, no C{unicode}
176 @raise ValueError: if in_string is C{str}, but it isn't ascii
177 """
178 utils.check_type('in_string', basestring)
179
180
181 try:
182 russian = unicode(in_string)
183 except UnicodeDecodeError:
184 raise ValueError("We expects when in_string is str type," + \
185 "it is an ascii, but now it isn't. Use unicode " + \
186 "in this case.")
187
188 for symb_out, symb_in in TRANSTABLE:
189 russian = russian.replace(symb_in, symb_out)
190
191 return russian
192
194 """
195 Prepare string for slug (i.e. URL or file/dir name)
196
197 @param in_string: input string
198 @type in_string: C{basestring}
199
200 @return: slug-string
201 @rtype: C{str}
202
203 @raise TypeError: when in_string isn't C{unicode} or C{str}
204 @raise ValueError: if in_string is C{str}, but it isn't ascii
205 """
206 utils.check_type('in_string', basestring)
207 try:
208 u_in_string = unicode(in_string)
209 except UnicodeDecodeError:
210 raise ValueError("We expects when in_string is str type," + \
211 "it is an ascii, but now it isn't. Use unicode " + \
212 "in this case.")
213
214
215
216
217 u_in_string = re.sub('\&\;|\&', ' and ', u_in_string)
218
219 u_in_string = re.sub('[-\s]+', '-', u_in_string)
220
221 u_in_string = u''.join([symb for symb in u_in_string if symb in ALPHABET])
222
223 out_string = translify(u_in_string)
224
225 return re.sub('[^\w\s-]', '', out_string).strip().lower()
226
228 """
229 Alias for L{slugify}
230 """
231 slugify(in_string)
232