1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': (u'Afrikaans', 2, '(n != 1)'),
31 'ak': (u'Akan', 2, 'n > 1'),
32 'am': (u'Amharic', 2, 'n > 1'),
33 'an': (u'Aragonese', 2, '(n != 1)'),
34 'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'),
35 'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'),
36 'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, '(n != 1)'),
37 'az': (u'Azerbaijani', 2, '(n != 1)'),
38 'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'bg': (u'Bulgarian', 2, '(n != 1)'),
40 'bn': (u'Bengali', 2, '(n != 1)'),
41 'bn_IN': (u'Bengali (India)', 2, '(n != 1)'),
42 'bo': (u'Tibetan', 1, '0'),
43 'br': (u'Breton', 2, 'n > 1'),
44 'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
45 'ca': (u'Catalan; Valencian', 2, '(n != 1)'),
46 'ca@valencia': (u'Catalan; Valencian (Valencia)', 2, '(n != 1)'),
47 'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
48 'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
49 'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'),
50 'da': (u'Danish', 2, '(n != 1)'),
51 'de': (u'German', 2, '(n != 1)'),
52 'dz': (u'Dzongkha', 1, '0'),
53 'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'),
54 'en': (u'English', 2, '(n != 1)'),
55 'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'),
56 'en_ZA': (u'English (South Africa)', 2, '(n != 1)'),
57 'eo': (u'Esperanto', 2, '(n != 1)'),
58 'es': (u'Spanish; Castilian', 2, '(n != 1)'),
59 'et': (u'Estonian', 2, '(n != 1)'),
60 'eu': (u'Basque', 2, '(n != 1)'),
61 'fa': (u'Persian', 1, '0'),
62 'fi': (u'Finnish', 2, '(n != 1)'),
63 'fil': (u'Filipino; Pilipino', 2, '(n > 1)'),
64 'fo': (u'Faroese', 2, '(n != 1)'),
65 'fr': (u'French', 2, '(n > 1)'),
66 'fur': (u'Friulian', 2, '(n != 1)'),
67 'fy': (u'Frisian', 2, '(n != 1)'),
68 'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
69 'gl': (u'Galician', 2, '(n != 1)'),
70 'gu': (u'Gujarati', 2, '(n != 1)'),
71 'gun': (u'Gun', 2, '(n > 1)'),
72 'ha': (u'Hausa', 2, '(n != 1)'),
73 'he': (u'Hebrew', 2, '(n != 1)'),
74 'hi': (u'Hindi', 2, '(n != 1)'),
75 'hy': (u'Armenian', 1, '0'),
76 'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
77 'hu': (u'Hungarian', 2, '(n != 1)'),
78 'id': (u'Indonesian', 1, '0'),
79 'is': (u'Icelandic', 2, '(n != 1)'),
80 'it': (u'Italian', 2, '(n != 1)'),
81 'ja': (u'Japanese', 1, '0'),
82 'jv': (u'Javanese', 2, '(n != 1)'),
83 'ka': (u'Georgian', 1, '0'),
84 'km': (u'Central Khmer', 1, '0'),
85 'kn': (u'Kannada', 2, '(n != 1)'),
86 'ko': (u'Korean', 1, '0'),
87 'ku': (u'Kurdish', 2, '(n != 1)'),
88 'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
89 'ky': (u'Kirghiz; Kyrgyz', 1, '0'),
90 'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
91 'ln': (u'Lingala', 2, '(n > 1)'),
92 'lo': (u'Lao', 1, '0'),
93 'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
94 'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
95 'mg': (u'Malagasy', 2, '(n > 1)'),
96 'mi': (u'Maori', 2, '(n > 1)'),
97 'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
98 'ml': (u'Malayalam', 2, '(n != 1)'),
99 'mn': (u'Mongolian', 2, '(n != 1)'),
100 'mr': (u'Marathi', 2, '(n != 1)'),
101 'ms': (u'Malay', 1, '0'),
102 'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
103 'nah': (u'Nahuatl languages', 2, '(n != 1)'),
104 'nap': (u'Neapolitan', 2, '(n != 1)'),
105 'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'),
106 'ne': (u'Nepali', 2, '(n != 1)'),
107 'nl': (u'Dutch; Flemish', 2, '(n != 1)'),
108 'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'),
109 'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
110 'oc': (u'Occitan (post 1500)', 2, '(n > 1)'),
111 'or': (u'Oriya', 2, '(n != 1)'),
112 'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'),
113 'pap': (u'Papiamento', 2, '(n != 1)'),
114 'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
115 'pms': (u'Piemontese', 2, '(n != 1)'),
116 'ps': (u'Pushto; Pashto', 2, '(n != 1)'),
117 'pt': (u'Portuguese', 2, '(n != 1)'),
118 'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'),
119 'rm': (u'Romansh', 2, '(n != 1)'),
120 'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
121 'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
122 'sco': (u'Scots', 2, '(n != 1)'),
123 'si': (u'Sinhala; Sinhalese', 2, '(n != 1)'),
124 'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
125 'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
126 'so': (u'Somali', 2, '(n != 1)'),
127 'sq': (u'Albanian', 2, '(n != 1)'),
128 'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
129 'st': (u'Sotho, Southern', 2, '(n != 1)'),
130 'su': (u'Sundanese', 1, '0'),
131 'sv': (u'Swedish', 2, '(n != 1)'),
132 'sw': (u'Swahili', 2, '(n != 1)'),
133 'ta': (u'Tamil', 2, '(n != 1)'),
134 'te': (u'Telugu', 2, '(n != 1)'),
135 'tg': (u'Tajik', 2, '(n != 1)'),
136 'ti': (u'Tigrinya', 2, '(n > 1)'),
137 'th': (u'Thai', 1, '0'),
138 'tk': (u'Turkmen', 2, '(n != 1)'),
139 'tr': (u'Turkish', 1, '0'),
140 'tt': (u'Tatar', 1, '0'),
141 'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
142 'vi': (u'Vietnamese', 1, '0'),
143 'wa': (u'Walloon', 2, '(n > 1)'),
144
145
146
147 'zh_CN': (u'Chinese (China)', 1, '0'),
148 'zh_HK': (u'Chinese (Hong Kong)', 1, '0'),
149 'zh_TW': (u'Chinese (Taiwan)', 1, '0'),
150 'zu': (u'Zulu', 2, '(n != 1)'),
151 }
152 """Dictionary of language data.
153 The language code is the dictionary key (which may contain country codes and modifiers).
154 The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation).
155
156 Note that the English names should not be used in user facing places - it
157 should always be passed through the function returned from tr_lang(), or at
158 least passed through _fix_language_name()."""
159
160 _fixed_names = {
161 u"Asturian; Bable; Leonese; Asturleonese": u"Asturian",
162 u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål",
163 u"Catalan; Valencian": u"Catalan",
164 u"Central Khmer": u"Khmer",
165 u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja",
166 u"Divehi; Dhivehi; Maldivian": u"Divehi",
167 u"Dutch; Flemish": u"Dutch",
168 u"Filipino; Pilipino": u"Filipino",
169 u"Greek, Modern (1453-)": u"Greek",
170 u"Kirghiz; Kyrgyz": u"Kirghiz",
171 u"Klingon; tlhIngan-Hol": u"Klingon",
172 u"Limburgan; Limburger; Limburgish": u"Limburgish",
173 u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German",
174 u"Luxembourgish; Letzeburgesch": u"Luxembourgish",
175 u"Ndebele, South; South Ndebele": u"Southern Ndebele",
176 u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk",
177 u"Occitan (post 1500)": u"Occitan",
178 u"Panjabi; Punjabi": u"Punjabi",
179 u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho",
180 u"Pushto; Pashto": u"Pashto",
181 u"Sinhala; Sinhalese": u"Sinhala",
182 u"Sotho, Southern": u"Sotho",
183 u"Spanish; Castilian": u"Spanish",
184 u"Uighur; Uyghur": u"Uighur",
185 }
186
188 """This attempts to simplify the given language code by ignoring country
189 codes, for example.
190
191 @see:
192 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
193 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
194 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
195 - U{http://www.w3.org/International/articles/language-tags/}
196 """
197 if not code:
198 return code
199
200 normalized = normalize_code(code)
201 separator = normalized.rfind('-')
202 if separator >= 0:
203 return code[:separator]
204 else:
205 return ""
206
207
208 expansion_factors = {
209 'af': 0.1,
210 'ar': -0.09,
211 'es': 0.21,
212 'fr': 0.28,
213 'it': 0.2,
214 }
215 """Source to target string length expansion factors."""
216
217 import gettext
218 import locale
219 import re
220 import os
221
222 iso639 = {}
223 """ISO 639 language codes"""
224 iso3166 = {}
225 """ISO 3166 country codes"""
226
227 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
228 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
229
231 """matches a languagecode to another, ignoring regions in the second"""
232 if languagecode is None:
233 return langcode_re.match(otherlanguagecode)
234 return languagecode == otherlanguagecode or \
235 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
236
237 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]+)\)$")
238
240 """Gives a function that can translate a language name, even in the form C{"language (country)"},
241 into the language with iso code langcode, or the system language if no language is specified."""
242 langfunc = gettext_lang(langcode)
243 countryfunc = gettext_country(langcode)
244
245 def handlelanguage(name):
246 match = dialect_name_re.match(name)
247 if match:
248 language, country = match.groups()
249 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country))
250 else:
251 return _fix_language_name(langfunc(name))
252
253 return handlelanguage
254
256 """Identify and replace some unsightly names present in iso-codes.
257
258 If the name is present in _fixed_names we assume it is untranslated and
259 we replace it with a more usable rendering."""
260 return _fixed_names.get(name, name)
261
262
263 -def gettext_lang(langcode=None):
264 """Returns a gettext function to translate language names into the given
265 language, or the system language if no language is specified."""
266 if not langcode in iso639:
267 if not langcode:
268 langcode = ""
269 if os.name == "nt":
270
271 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
272 else:
273 t = gettext.translation('iso_639', fallback=True)
274 else:
275 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
276 iso639[langcode] = t.ugettext
277 return iso639[langcode]
278
279 -def gettext_country(langcode=None):
280 """Returns a gettext function to translate country names into the given
281 language, or the system language if no language is specified."""
282 if not langcode in iso3166:
283 if not langcode:
284 langcode = ""
285 if os.name == "nt":
286
287 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
288 else:
289 t = gettext.translation('iso_3166', fallback=True)
290 else:
291 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
292 iso3166[langcode] = t.ugettext
293 return iso3166[langcode]
294
296 """Return a unicode string in its normalized form
297
298 @param string: The string to be normalized
299 @param normal_form: NFC (default), NFD, NFKC, NFKD
300 @return: Normalized string
301 """
302 if string is None:
303 return None
304 else:
305 return unicodedata.normalize(normal_form, string)
306
308 """Ensures that the string is in unicode.
309
310 @param string: A text string
311 @type string: Unicode, String
312 @return: String converted to Unicode and normalized as needed.
313 @rtype: Unicode
314 """
315 if string is None:
316 return None
317 if isinstance(string, str):
318 encoding = getattr(string, "encoding", "utf-8")
319 string = string.decode(encoding)
320 elif isinstance(string, StringElem):
321 string = unicode(string)
322 return string
323
325 """Forces the string to unicode and does normalization."""
326 return normalize(forceunicode(string))
327
330
332 """Simplify language code to the most commonly used form for the
333 language, stripping country information for languages that tend
334 not to be localized differently for different countries"""
335 simpler = simplercode(language_code)
336 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
337 return language_code
338 else:
339 return simplify_to_common(simpler)
340