1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .po files (pounit) or entire files (pofile).
23
24 Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and
25 many other projects.
26
27 This uses libgettextpo from the gettext package. Any version before 0.17 will
28 at least cause some subtle bugs or may not work at all. Developers might want
29 to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext
30 package for the public API of the library.
31 """
32
33 from ctypes import c_size_t, c_int, c_uint, c_char_p, c_long, CFUNCTYPE, POINTER
34 from ctypes import Structure, cdll
35 import ctypes.util
36 import os
37 import re
38 import sys
39 import tempfile
40 import urllib
41
42 from translate.lang import data
43 from translate.misc.multistring import multistring
44 from translate.storage import base, pocommon
45 from translate.storage import pypo
46 from translate.storage.pocommon import encodingToUse
47
48 lsep = " "
49 """Seperator for #: entries"""
50
51 STRING = c_char_p
52
53
54
57
58
59 xerror_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
60 xerror2_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
61
62
63
67
68
70 _fields_ = [
71 ('error', CFUNCTYPE(None, c_int, c_int, STRING)),
72 ('error_at_line', CFUNCTYPE(None, c_int, c_int, STRING, c_uint, STRING)),
73 ('multiline_warning', CFUNCTYPE(None, STRING, STRING)),
74 ('multiline_error', CFUNCTYPE(None, STRING, STRING)),
75 ]
76
77
78
79 -def xerror_cb(severity, message, filename, lineno, column, multilint_p, message_text):
80 print >> sys.stderr, "xerror_cb", severity, message, filename, lineno, column, multilint_p, message_text
81 if severity >= 1:
82 raise ValueError(message_text)
83
84
85 -def xerror2_cb(severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2):
86 print >> sys.stderr, "xerror2_cb", severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2
87 if severity >= 1:
88 raise ValueError(message_text1)
89
90
91
92 gpo = None
93
94
95 names = ['gettextpo', 'libgettextpo']
96 for name in names:
97 lib_location = ctypes.util.find_library(name)
98 if lib_location:
99 gpo = cdll.LoadLibrary(lib_location)
100 if gpo:
101 break
102 else:
103
104
105 try:
106 gpo = cdll.LoadLibrary('libgettextpo.so')
107 except OSError, e:
108 raise ImportError("gettext PO library not found")
109
110
111
112 gpo.po_file_read_v3.argtypes = [STRING, POINTER(po_xerror_handler)]
113 gpo.po_file_write_v2.argtypes = [c_int, STRING, POINTER(po_xerror_handler)]
114 gpo.po_file_write_v2.retype = c_int
115
116
117 gpo.po_file_domain_header.restype = STRING
118 gpo.po_header_field.restype = STRING
119 gpo.po_header_field.argtypes = [STRING, STRING]
120
121
122 gpo.po_filepos_file.restype = STRING
123 gpo.po_message_filepos.restype = c_int
124 gpo.po_message_filepos.argtypes = [c_int, c_int]
125 gpo.po_message_add_filepos.argtypes = [c_int, STRING, c_size_t]
126
127
128 gpo.po_message_comments.restype = STRING
129 gpo.po_message_extracted_comments.restype = STRING
130 gpo.po_message_prev_msgctxt.restype = STRING
131 gpo.po_message_prev_msgid.restype = STRING
132 gpo.po_message_prev_msgid_plural.restype = STRING
133 gpo.po_message_is_format.restype = c_int
134 gpo.po_message_is_format.argtypes = [c_int, STRING]
135 gpo.po_message_set_format.argtypes = [c_int, STRING, c_int]
136 gpo.po_message_msgctxt.restype = STRING
137 gpo.po_message_msgid.restype = STRING
138 gpo.po_message_msgid_plural.restype = STRING
139 gpo.po_message_msgstr.restype = STRING
140 gpo.po_message_msgstr_plural.restype = STRING
141
142
143 gpo.po_message_set_comments.argtypes = [c_int, STRING]
144 gpo.po_message_set_extracted_comments.argtypes = [c_int, STRING]
145 gpo.po_message_set_fuzzy.argtypes = [c_int, c_int]
146 gpo.po_message_set_msgctxt.argtypes = [c_int, STRING]
147
148
149 xerror_handler = po_xerror_handler()
150 xerror_handler.xerror = xerror_prototype(xerror_cb)
151 xerror_handler.xerror2 = xerror2_prototype(xerror2_cb)
152
153
156
157
160
161
164
165
167 """Returns the libgettextpo version
168
169 @rtype: three-value tuple
170 @return: libgettextpo version in the following format::
171 (major version, minor version, subminor version)
172 """
173 libversion = c_long.in_dll(gpo, 'libgettextpo_version')
174 major = libversion.value >> 16
175 minor = libversion.value >> 8
176 subminor = libversion.value - (major << 16) - (minor << 8)
177 return major, minor, subminor
178
179
180 -class pounit(pocommon.pounit):
181
182 - def __init__(self, source=None, encoding='utf-8', gpo_message=None):
183 self._rich_source = None
184 self._rich_target = None
185 self._encoding = encoding or 'utf-8'
186 if not gpo_message:
187 self._gpo_message = gpo.po_message_create()
188 if source or source == "":
189 self.source = source
190 self.target = ""
191 elif gpo_message:
192 self._gpo_message = gpo_message
193 self.infer_state()
194
205
210 msgid_plural = property(None, setmsgid_plural)
211
213
214 def remove_msgid_comments(text):
215 if not text:
216 return text
217 if text.startswith("_:"):
218 remainder = re.search(r"_: .*\n(.*)", text)
219 if remainder:
220 return remainder.group(1)
221 else:
222 return u""
223 else:
224 return text
225 singular = remove_msgid_comments((gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding))
226 if singular:
227 if self.hasplural():
228 multi = multistring(singular, self._encoding)
229 pluralform = (gpo.po_message_msgid_plural(self._gpo_message) or "").decode(self._encoding)
230 multi.strings.append(pluralform)
231 return multi
232 else:
233 return singular
234 else:
235 return u""
236
249 source = property(getsource, setsource)
250
252 if self.hasplural():
253 plurals = []
254 nplural = 0
255 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
256 while plural:
257 plurals.append(plural.decode(self._encoding))
258 nplural += 1
259 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
260 if plurals:
261 multi = multistring(plurals, encoding=self._encoding)
262 else:
263 multi = multistring(u"")
264 else:
265 multi = (gpo.po_message_msgstr(self._gpo_message) or "").decode(self._encoding)
266 return multi
267
269
270 if self.hasplural():
271 if isinstance(target, multistring):
272 target = target.strings
273 elif isinstance(target, basestring):
274 target = [target]
275
276 elif isinstance(target, (dict, list)):
277 if len(target) == 1:
278 target = target[0]
279 else:
280 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
281
282
283
284
285
286 if isinstance(target, (dict, list)):
287 i = 0
288 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
289 while message is not None:
290 gpo.po_message_set_msgstr_plural(self._gpo_message, i, None)
291 i += 1
292 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
293
294 if isinstance(target, list):
295 for i in range(len(target)):
296 targetstring = target[i]
297 if isinstance(targetstring, unicode):
298 targetstring = targetstring.encode(self._encoding)
299 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
300
301 elif isinstance(target, dict):
302 for i, targetstring in enumerate(target.itervalues()):
303 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
304
305 else:
306 if isinstance(target, unicode):
307 target = target.encode(self._encoding)
308 if target is None:
309 gpo.po_message_set_msgstr(self._gpo_message, "")
310 else:
311 gpo.po_message_set_msgstr(self._gpo_message, target)
312 target = property(gettarget, settarget)
313
315 """The unique identifier for this unit according to the convensions in
316 .mo files."""
317 id = (gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding)
318
319
320
321
322
323
324
325 context = gpo.po_message_msgctxt(self._gpo_message)
326 if context:
327 id = u"%s\04%s" % (context.decode(self._encoding), id)
328 return id
329
331 if origin == None:
332 comments = gpo.po_message_comments(self._gpo_message) + \
333 gpo.po_message_extracted_comments(self._gpo_message)
334 elif origin == "translator":
335 comments = gpo.po_message_comments(self._gpo_message)
336 elif origin in ["programmer", "developer", "source code"]:
337 comments = gpo.po_message_extracted_comments(self._gpo_message)
338 else:
339 raise ValueError("Comment type not valid")
340
341 if comments and get_libgettextpo_version() < (0, 17, 0):
342 comments = "\n".join([line for line in comments.split("\n")])
343
344 return comments[:-1].decode(self._encoding)
345
346 - def addnote(self, text, origin=None, position="append"):
347
348 if not (text and text.strip()):
349 return
350 text = data.forceunicode(text)
351 oldnotes = self.getnotes(origin)
352 newnotes = None
353 if oldnotes:
354 if position == "append":
355 newnotes = oldnotes + "\n" + text
356 elif position == "merge":
357 if oldnotes != text:
358 oldnoteslist = oldnotes.split("\n")
359 for newline in text.split("\n"):
360 newline = newline.rstrip("\r")
361
362 if newline not in oldnotes or len(newline) < 5:
363 oldnoteslist.append(newline)
364 newnotes = "\n".join(oldnoteslist)
365 else:
366 newnotes = text + '\n' + oldnotes
367 else:
368 newnotes = "\n".join([line.rstrip("\r") for line in text.split("\n")])
369
370 if newnotes:
371 newlines = []
372 needs_space = get_libgettextpo_version() < (0, 17, 0)
373 for line in newnotes.split("\n"):
374 if line and needs_space:
375 newlines.append(" " + line)
376 else:
377 newlines.append(line)
378 newnotes = "\n".join(newlines).encode(self._encoding)
379 if origin in ["programmer", "developer", "source code"]:
380 gpo.po_message_set_extracted_comments(self._gpo_message, newnotes)
381 else:
382 gpo.po_message_set_comments(self._gpo_message, newnotes)
383
385 gpo.po_message_set_comments(self._gpo_message, "")
386
388 newpo = self.__class__()
389 newpo._gpo_message = self._gpo_message
390 return newpo
391
392 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
426
428
429
430 return self.getid() == "" and len(self.target) > 0
431
434
437
440
447
448
449
450
451
452
453
455 gpo.po_message_set_fuzzy(self._gpo_message, present)
456
458
459
460 gpo.po_message_set_obsolete(self._gpo_message, True)
461 self.infer_state()
462
464 gpo.po_message_set_obsolete(self._gpo_message, False)
465 self.infer_state()
466
468 return gpo.po_message_msgid_plural(self._gpo_message) is not None
469
481
485 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
486
488 pf = pofile(noheader=True)
489 pf.addunit(self)
490 return str(pf)
491
493 locations = []
494 i = 0
495 location = gpo.po_message_filepos(self._gpo_message, i)
496 while location:
497 locname = gpo.po_filepos_file(location)
498 locline = gpo.po_filepos_start_line(location)
499 if locline == -1:
500 locstring = locname
501 else:
502 locstring = locname + ":" + str(locline)
503 locations.append(urllib.unquote_plus(locstring))
504 i += 1
505 location = gpo.po_message_filepos(self._gpo_message, i)
506 return locations
507
509 if location.find(" ") != -1:
510 location = urllib.quote_plus(location)
511 parts = location.split(":")
512 file = parts[0]
513 if len(parts) == 2:
514 line = int(parts[1] or "0")
515 else:
516 line = -1
517 gpo.po_message_add_filepos(self._gpo_message, file, line)
518
519 - def getcontext(self):
520 msgctxt = gpo.po_message_msgctxt(self._gpo_message)
521 if msgctxt:
522 return msgctxt.decode(self._encoding)
523 else:
524 msgidcomment = self._extract_msgidcomments()
525 return msgidcomment
526
527 - def setcontext(self, context):
528 context = data.forceunicode(context)
529 gpo.po_message_set_msgctxt(self._gpo_message, context)
530
565 buildfromunit = classmethod(buildfromunit)
566
567
568 -class pofile(pocommon.pofile):
569 UnitClass = pounit
570
571 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit, noheader=False):
572 self._gpo_memory_file = None
573 self._gpo_message_iterator = None
574 self.units = []
575 self.sourcelanguage = None
576 self.targetlanguage = None
577 self._encoding = 'utf-8'
578 if inputfile is None:
579 self._gpo_memory_file = gpo.po_file_create()
580 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
581 if not noheader:
582 self.init_headers()
583 else:
584 super(pofile, self).__init__(inputfile=inputfile, encoding=encoding)
585
586 - def addunit(self, unit, new=True):
587 if new:
588 gpo.po_message_insert(self._gpo_message_iterator, unit._gpo_message)
589 super(pofile, self).addunit(unit)
590
592 header._store = self
593 self.units.insert(0, header)
594 gpo.po_message_iterator_free(self._gpo_message_iterator)
595 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
596 gpo.po_message_insert(self._gpo_message_iterator, header._gpo_message)
597 while gpo.po_next_message(self._gpo_message_iterator):
598 pass
599
601 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
602
603
604 id_dict = {}
605 uniqueunits = []
606
607
608 markedpos = []
609
610 def addcomment(thepo):
611 thepo.msgidcomment = " ".join(thepo.getlocations())
612 markedpos.append(thepo)
613 for thepo in self.units:
614 id = thepo.getid()
615 if thepo.isheader() and not thepo.getlocations():
616
617 uniqueunits.append(thepo)
618 elif id in id_dict:
619 if duplicatestyle == "merge":
620 if id:
621 id_dict[id].merge(thepo)
622 else:
623 addcomment(thepo)
624 uniqueunits.append(thepo)
625 elif duplicatestyle == "msgctxt":
626 origpo = id_dict[id]
627 if origpo not in markedpos:
628 gpo.po_message_set_msgctxt(origpo._gpo_message, " ".join(origpo.getlocations()))
629 markedpos.append(thepo)
630 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
631 uniqueunits.append(thepo)
632 else:
633 if not id:
634 if duplicatestyle == "merge":
635 addcomment(thepo)
636 else:
637 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
638 id_dict[id] = thepo
639 uniqueunits.append(thepo)
640 new_gpo_memory_file = gpo.po_file_create()
641 new_gpo_message_iterator = gpo.po_message_iterator(new_gpo_memory_file, None)
642 for unit in uniqueunits:
643 gpo.po_message_insert(new_gpo_message_iterator, unit._gpo_message)
644 gpo.po_message_iterator_free(self._gpo_message_iterator)
645 self._gpo_message_iterator = new_gpo_message_iterator
646 self._gpo_memory_file = new_gpo_memory_file
647 self.units = uniqueunits
648
650
651 def obsolete_workaround():
652
653
654
655 for unit in self.units:
656 if unit.isobsolete():
657 gpo.po_message_set_extracted_comments(unit._gpo_message, "")
658 location = gpo.po_message_filepos(unit._gpo_message, 0)
659 while location:
660 gpo.po_message_remove_filepos(unit._gpo_message, 0)
661 location = gpo.po_message_filepos(unit._gpo_message, 0)
662 outputstring = ""
663 if self._gpo_memory_file:
664 obsolete_workaround()
665 f, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
666 os.close(f)
667 self._gpo_memory_file = gpo.po_file_write_v2(self._gpo_memory_file, fname, xerror_handler)
668 f = open(fname)
669 outputstring = f.read()
670 f.close()
671 os.remove(fname)
672 return outputstring
673
675 """Returns True if the object doesn't contain any translation units."""
676 if len(self.units) == 0:
677 return True
678
679 if self.units[0].isheader():
680 units = self.units[1:]
681 else:
682 units = self.units
683
684 for unit in units:
685 if not unit.isblank() and not unit.isobsolete():
686 return False
687 return True
688
690 if hasattr(input, 'name'):
691 self.filename = input.name
692 elif not getattr(self, 'filename', ''):
693 self.filename = ''
694
695 if hasattr(input, "read"):
696 posrc = input.read()
697 input.close()
698 input = posrc
699
700 needtmpfile = not os.path.isfile(input)
701 if needtmpfile:
702
703 fd, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
704 os.write(fd, input)
705 input = fname
706 os.close(fd)
707
708 self._gpo_memory_file = gpo.po_file_read_v3(input, xerror_handler)
709 if self._gpo_memory_file is None:
710 print >> sys.stderr, "Error:"
711
712 if needtmpfile:
713 os.remove(input)
714
715 self.units = []
716
717 self._header = gpo.po_file_domain_header(self._gpo_memory_file, None)
718 if self._header:
719 charset = gpo.po_header_field(self._header, "Content-Type")
720 if charset:
721 charset = re.search("charset=([^\\s]+)", charset).group(1)
722 self._encoding = encodingToUse(charset)
723 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
724 newmessage = gpo.po_next_message(self._gpo_message_iterator)
725 while newmessage:
726 newunit = pounit(gpo_message=newmessage, encoding=self._encoding)
727 self.addunit(newunit, new=False)
728 newmessage = gpo.po_next_message(self._gpo_message_iterator)
729 self._free_iterator()
730
732
733
734 return
735 self._free_iterator()
736 if self._gpo_memory_file is not None:
737 gpo.po_file_free(self._gpo_memory_file)
738 self._gpo_memory_file = None
739
741
742
743 return
744 if self._gpo_message_iterator is not None:
745 gpo.po_message_iterator_free(self._gpo_message_iterator)
746 self._gpo_message_iterator = None
747