1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': ('Afrikaans', 2, '(n != 1)'),
31 'ak': ('Akan', 2, 'n > 1'),
32 'am': ('Amharic', 2, 'n > 1'),
33 'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5'),
34 'arn': ('Mapudungun; Mapuche', 2, 'n > 1'),
35 'az': ('Azerbaijani', 2, '(n != 1)'),
36 'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
37 'bg': ('Bulgarian', 2, '(n != 1)'),
38 'bn': ('Bengali', 2, '(n != 1)'),
39 'bn_IN': ('Bengali (India)', 2, '(n != 1)'),
40 'bo': ('Tibetan', 1, '0'),
41 'br': ('Breton', 2, 'n > 1'),
42 'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
43 'ca': ('Catalan; Valencian', 2, '(n != 1)'),
44 'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
45 'csb': ('Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
46 'cy': ('Welsh', 2, '(n==2) ? 1 : 0'),
47 'da': ('Danish', 2, '(n != 1)'),
48 'de': ('German', 2, '(n != 1)'),
49 'dz': ('Dzongkha', 1, '0'),
50 'el': ('Greek', 2, '(n != 1)'),
51 'en': ('English', 2, '(n != 1)'),
52 'en_GB': ('English (United Kingdom)', 2, '(n != 1)'),
53 'en_ZA': ('English (South Africa)', 2, '(n != 1)'),
54 'eo': ('Esperanto', 2, '(n != 1)'),
55 'es': ('Spanish; Castilian', 2, '(n != 1)'),
56 'et': ('Estonian', 2, '(n != 1)'),
57 'eu': ('Basque', 2, '(n != 1)'),
58 'fa': ('Persian', 1, '0'),
59 'fi': ('Finnish', 2, '(n != 1)'),
60 'fil': ('Filipino; Pilipino', 2, '(n > 1)'),
61 'fo': ('Faroese', 2, '(n != 1)'),
62 'fr': ('French', 2, '(n > 1)'),
63 'fur': ('Friulian', 2, '(n != 1)'),
64 'fy': ('Frisian', 2, '(n != 1)'),
65 'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
66 'gl': ('Galician', 2, '(n != 1)'),
67 'gu': ('Gujarati', 2, '(n != 1)'),
68 'gun': ('Gun', 2, '(n > 1)'),
69 'ha': ('Hausa', 2, '(n != 1)'),
70 'he': ('Hebrew', 2, '(n != 1)'),
71 'hi': ('Hindi', 2, '(n != 1)'),
72 'hy': ('Armenian', 1, '0'),
73 'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
74 'hu': ('Hungarian', 2, '(n != 1)'),
75 'id': ('Indonesian', 1, '0'),
76 'is': ('Icelandic', 2, '(n != 1)'),
77 'it': ('Italian', 2, '(n != 1)'),
78 'ja': ('Japanese', 1, '0'),
79 'jv': ('Javanese', 2, '(n != 1)'),
80 'ka': ('Georgian', 1, '0'),
81 'km': ('Khmer', 1, '0'),
82 'kn': ('Kannada', 2, '(n != 1)'),
83 'ko': ('Korean', 1, '0'),
84 'ku': ('Kurdish', 2, '(n != 1)'),
85 'kw': ('Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
86 'ky': ('Kirghiz; Kyrgyz', 1, '0'),
87 'lb': ('Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
88 'ln': ('Lingala', 2, '(n > 1)'),
89 'lo': ('Lao', 1, '0'),
90 'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
91 'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
92 'mg': ('Malagasy', 2, '(n > 1)'),
93 'mi': ('Maori', 2, '(n > 1)'),
94 'mk': ('Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
95 'ml': ('Malayalam', 2, '(n != 1)'),
96 'mn': ('Mongolian', 2, '(n != 1)'),
97 'mr': ('Marathi', 2, '(n != 1)'),
98 'ms': ('Malay', 1, '0'),
99 'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
100 'nah': ('Nahuatl languages', 2, '(n != 1)'),
101 'nap': ('Neapolitan', 2, '(n != 1)'),
102 'nb': ('Norwegian Bokmal', 2, '(n != 1)'),
103 'ne': ('Nepali', 2, '(n != 1)'),
104 'nl': ('Dutch; Flemish', 2, '(n != 1)'),
105 'nn': ('Norwegian Nynorsk', 2, '(n != 1)'),
106 'nso': ('Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
107 'or': ('Oriya', 2, '(n != 1)'),
108 'pa': ('Panjabi; Punjabi', 2, '(n != 1)'),
109 'pap': ('Papiamento', 2, '(n != 1)'),
110 'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
111 'pms': ('Piemontese', 2, '(n != 1)'),
112 'ps': ('Pushto; Pashto', 2, '(n != 1)'),
113 'pt': ('Portuguese', 2, '(n != 1)'),
114 'pt_BR': ('Portuguese (Brazil)', 2, '(n > 1)'),
115 'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
116 'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
117 'sco': ('Scots', 2, '(n != 1)'),
118 'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
119 'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
120 'so': ('Somali', 2, '(n != 1)'),
121 'sq': ('Albanian', 2, '(n != 1)'),
122 'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
123 'st': ('Sotho, Southern', 2, '(n != 1)'),
124 'su': ('Sundanese', 1, '0'),
125 'sv': ('Swedish', 2, '(n != 1)'),
126 'sw': ('Swahili', 2, '(n != 1)'),
127 'ta': ('Tamil', 2, '(n != 1)'),
128 'te': ('Telugu', 2, '(n != 1)'),
129 'tg': ('Tajik', 2, '(n != 1)'),
130 'ti': ('Tigrinya', 2, '(n > 1)'),
131 'th': ('Thai', 1, '0'),
132 'tk': ('Turkmen', 2, '(n != 1)'),
133 'tr': ('Turkish', 1, '0'),
134 'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
135 'vi': ('Vietnamese', 1, '0'),
136 'wa': ('Walloon', 2, '(n > 1)'),
137
138
139
140 'zh_CN': ('Chinese (China)', 1, '0'),
141 'zh_HK': ('Chinese (Hong Kong)', 1, '0'),
142 'zh_TW': ('Chinese (Taiwan)', 1, '0'),
143 'zu': ('Zulu', 2, '(n != 1)'),
144 }
145 """Dictionary of language data.
146 The language code is the dictionary key (which may contain country codes and modifiers).
147 The value is a tuple: (Full name in English, nplurals, plural equation)"""
148
150 """This attempts to simplify the given language code by ignoring country
151 codes, for example.
152
153 @see:
154 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
155 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
156 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
157 - U{http://www.w3.org/International/articles/language-tags/}
158 """
159 if not code:
160 return code
161
162 normalized = normalize_code(code)
163 separator = normalized.rfind('-')
164 if separator >= 0:
165 return code[:separator]
166 else:
167 return ""
168
169
170 expansion_factors = {
171 'af': 0.1,
172 'ar': -0.09,
173 'es': 0.21,
174 'fr': 0.28,
175 'it': 0.2,
176 }
177 """Source to target string length expansion factors."""
178
179 import gettext
180 import locale
181 import re
182 import os
183
184 iso639 = {}
185 """ISO 639 language codes"""
186 iso3166 = {}
187 """ISO 3166 country codes"""
188
189 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
190 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
191
193 """matches a languagecode to another, ignoring regions in the second"""
194 if languagecode is None:
195 return langcode_re.match(otherlanguagecode)
196 return languagecode == otherlanguagecode or \
197 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
198
199 dialect_name_re = re.compile(r"(.+)\s\(([^)]+)\)$")
200
202 """Gives a function that can translate a language name, even in the form C{"language (country)"},
203 into the language with iso code langcode, or the system language if no language is specified."""
204 langfunc = gettext_lang(langcode)
205 countryfunc = gettext_country(langcode)
206
207 def handlelanguage(name):
208 match = dialect_name_re.match(name)
209 if match:
210 language, country = match.groups()
211 return u"%s (%s)" % (langfunc(language), countryfunc(country))
212 else:
213 return langfunc(name)
214
215 return handlelanguage
216
217 -def gettext_lang(langcode=None):
218 """Returns a gettext function to translate language names into the given
219 language, or the system language if no language is specified."""
220 if not langcode in iso639:
221 if not langcode:
222 langcode = ""
223 if os.name == "nt":
224
225 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
226 else:
227 t = gettext.translation('iso_639', fallback=True)
228 else:
229 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
230 iso639[langcode] = t.ugettext
231 return iso639[langcode]
232
233 -def gettext_country(langcode=None):
234 """Returns a gettext function to translate country names into the given
235 language, or the system language if no language is specified."""
236 if not langcode in iso3166:
237 if not langcode:
238 langcode = ""
239 if os.name == "nt":
240
241 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
242 else:
243 t = gettext.translation('iso_3166', fallback=True)
244 else:
245 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
246 iso3166[langcode] = t.ugettext
247 return iso3166[langcode]
248
250 """Return a unicode string in its normalized form
251
252 @param string: The string to be normalized
253 @param normal_form: NFC (default), NFD, NFCK, NFDK
254 @return: Normalized string
255 """
256 if string is None:
257 return None
258 else:
259 return unicodedata.normalize(normal_form, string)
260
262 """Ensures that the string is in unicode.
263
264 @param string: A text string
265 @type string: Unicode, String
266 @return: String converted to Unicode and normalized as needed.
267 @rtype: Unicode
268 """
269 if string is None:
270 return None
271 if isinstance(string, str):
272 encoding = getattr(string, "encoding", "utf-8")
273 string = string.decode(encoding)
274 elif isinstance(string, StringElem):
275 string = unicode(string)
276 return string
277
279 """Forces the string to unicode and does normalization."""
280 return normalize(forceunicode(string))
281
284
286 """Simplify language code to the most commonly used form for the
287 language, stripping country information for languages that tend
288 not to be localized differently for different countries"""
289 simpler = simplercode(language_code)
290 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
291 return language_code
292 else:
293 return simplify_to_common(simpler)
294