Package translate :: Package lang :: Module zh
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.zh

 1  #!/usr/bin/env python 
 2  # -*- coding: utf-8 -*- 
 3  # 
 4  # Copyright 2007 Zuza Software Foundation 
 5  # 
 6  # This file is part of translate. 
 7  # 
 8  # translate is free software; you can redistribute it and/or modify 
 9  # it under the terms of the GNU General Public License as published by 
10  # the Free Software Foundation; either version 2 of the License, or 
11  # (at your option) any later version. 
12  # 
13  # translate is distributed in the hope that it will be useful, 
14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  # GNU General Public License for more details. 
17  # 
18  # You should have received a copy of the GNU General Public License 
19  # along with translate; if not, write to the Free Software 
20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
21   
22  """This module represents Chinese language. (Both tradisional and simplified) 
23   
24  For more information, see U{http://en.wikipedia.org/wiki/Chinese_language} 
25  """ 
26   
27  import re 
28   
29  from translate.lang import common 
30   
31   
32 -class zh(common.Common):
33 """This class represents Chinese.""" 34 35 listseperator = u"、" 36 37 sentenceend = u"。!?…" 38 39 # Compared to common.py, we make the space after the sentence ending 40 # optional and don't demand an uppercase letter to follow. 41 sentencere = re.compile(r"""(?s) # make . also match newlines 42 .*? # any text, but match non-greedy 43 [%s] # the puntuation for sentence ending 44 \s* # the optional space after the puntuation 45 """ % sentenceend, re.VERBOSE) 46 47 # The following transformation rules should be mostly useful for all types 48 # of Chinese. The comma (,) is not handled here, since it maps to two 49 # different characters, depending on context. 50 # If comma is used as seperation of sentence, it should be converted to a 51 # fullwidth comma (","). If comma is used as seperation of list items 52 # like "apple, orange, grape, .....", "、" is used. 53 puncdict = { 54 u". ": u"。", 55 u"; ": u";", 56 u": ": u":", 57 u"! ": u"!", 58 u"? ": u"?", 59 u".\n": u"。\n", 60 u";\n": u";\n", 61 u":\n": u":\n", 62 u"!\n": u"!\n", 63 u"?\n": u"?", 64 u"% ": u"%", 65 } 66
67 - def length_difference(cls, length):
68 return 10 - length / 2
69 length_difference = classmethod(length_difference) 70 71 ignoretests = ["startcaps", "simplecaps"]
72