1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base, poparser
30 from translate.storage.pocommon import encodingToUse
31 import re
32 import copy
33 import cStringIO
34 import urllib
35
36 lsep = "\n#: "
37 """Seperator for #: entries"""
38
39
40
41 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
42 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
43
45 """Escapes a line for po format. assumes no \n occurs in the line.
46
47 @param line: unescaped text
48 """
49 special_locations = []
50 for special_key in po_escape_map:
51 special_locations.extend(quote.find_all(line, special_key))
52 special_locations = dict.fromkeys(special_locations).keys()
53 special_locations.sort()
54 escaped_line = ""
55 last_location = 0
56 for location in special_locations:
57 escaped_line += line[last_location:location]
58 escaped_line += po_escape_map[line[location:location+1]]
59 last_location = location+1
60 escaped_line += line[last_location:]
61 return escaped_line
62
66
68 """Wrap text for po files."""
69 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
70
71
72 if len(wrappedlines) > 1:
73 for index, line in enumerate(wrappedlines[1:]):
74 if line.startswith(' '):
75
76 wrappedlines[index+1] = line[1:]
77
78
79 wrappedlines[index] += ' '
80 return wrappedlines
81
83 """quotes the given text for a PO file, returning quoted and escaped lines"""
84 polines = []
85 if text is None:
86 return polines
87 lines = text.split("\n")
88 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
89 if len(lines) != 2 or lines[1]:
90 polines.extend(['""'])
91 for line in lines[:-1]:
92
93 lns = wrapline(line)
94 if len(lns) > 0:
95 for ln in lns[:-1]:
96 polines.extend(['"' + escapeforpo(ln) + '"'])
97 if lns[-1]:
98 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
99 else:
100 polines.extend(['"\\n"'])
101 if lines[-1]:
102 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
103 return polines
104
106 """Remove quote and unescape line from po file.
107
108 @param line: a quoted line from a po file (msgid or msgstr)
109 """
110 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
111 return extracted
112
115
117 return lst == [] or len(lst) == 1 and lst[0] == '""'
118
120 left = string.find('"')
121 right = string.rfind('"')
122 if right > -1:
123 return string[left:right+1]
124 else:
125 return string[left:] + '"'
126
127 -class pounit(pocommon.pounit):
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 __shallow__ = ['_store']
143
144 - def __init__(self, source=None, encoding="UTF-8"):
145 self._encoding = encodingToUse(encoding)
146 self.obsolete = False
147 self._initallcomments(blankall=True)
148 self.prev_msgctxt = []
149 self.prev_msgid = []
150 self.prev_msgid_plural = []
151 self.msgctxt = []
152 self.msgid = []
153 self.msgid_pluralcomments = []
154 self.msgid_plural = []
155 self.msgstr = []
156 self.obsoletemsgctxt = []
157 self.obsoletemsgid = []
158 self.obsoletemsgid_pluralcomments = []
159 self.obsoletemsgid_plural = []
160 self.obsoletemsgstr = []
161 pocommon.pounit.__init__(self, source)
162
172
180
181 allcomments = property(_get_all_comments)
182
191
209
213
215 """Sets the msgid to the given (unescaped) value.
216
217 @param source: an unescaped source string.
218 """
219 self._rich_source = None
220 self.msgid, self.msgid_plural = self._set_source_vars(source)
221 source = property(getsource, setsource)
222
224 """Returns the unescaped msgid"""
225 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
226
228 """Sets the msgid to the given (unescaped) value.
229
230 @param source: an unescaped source string.
231 """
232 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
233 prev_source = property(_get_prev_source, _set_prev_source)
234
242
244 """Sets the msgstr to the given (unescaped) value"""
245 self._rich_target = None
246 if isinstance(target, str):
247 target = target.decode(self._encoding)
248 if self.hasplural():
249 if isinstance(target, multistring):
250 target = target.strings
251 elif isinstance(target, basestring):
252 target = [target]
253 elif isinstance(target, (dict, list)):
254 if len(target) == 1:
255 target = target[0]
256 else:
257 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
258 templates = self.msgstr
259 if isinstance(templates, list):
260 templates = {0: templates}
261 if isinstance(target, list):
262 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
263 elif isinstance(target, dict):
264 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
265 else:
266 self.msgstr = quoteforpo(target)
267 target = property(gettarget, settarget)
268
270 """Return a list of alternate units.
271
272 Previous msgid and current msgstr is combined to form a single
273 alternative unit."""
274 prev_source = self.prev_source
275 if prev_source and self.isfuzzy():
276 unit = type(self)(prev_source)
277 unit.target = self.target
278
279
280
281 unit.xmlelement = dict()
282 return [unit]
283 return []
284
286 """Return comments based on origin value (programmer, developer, source code and translator)"""
287 if origin == None:
288 comments = u"".join([comment[2:] for comment in self.othercomments])
289 comments += u"".join([comment[3:] for comment in self.automaticcomments])
290 elif origin == "translator":
291 comments = u"".join ([comment[2:] for comment in self.othercomments])
292 elif origin in ["programmer", "developer", "source code"]:
293 comments = u"".join([comment[3:] for comment in self.automaticcomments])
294 else:
295 raise ValueError("Comment type not valid")
296
297 return comments[:-1]
298
299 - def addnote(self, text, origin=None, position="append"):
300 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
301
302 if not (text and text.strip()):
303 return
304 text = data.forceunicode(text)
305 commentlist = self.othercomments
306 linestart = "# "
307 autocomments = False
308 if origin in ["programmer", "developer", "source code"]:
309 autocomments = True
310 commentlist = self.automaticcomments
311 linestart = "#. "
312 text = text.split("\n")
313 newcomments = [linestart + line + "\n" for line in text]
314 if position == "append":
315 newcomments = commentlist + newcomments
316 elif position == "prepend":
317 newcomments = newcomments + commentlist
318
319 if autocomments:
320 self.automaticcomments = newcomments
321 else:
322 self.othercomments = newcomments
323
325 """Remove all the translator's notes (other comments)"""
326 self.othercomments = []
327
329
330 new_unit = self.__class__()
331
332
333 shallow = set(self.__shallow__)
334
335 for key, value in self.__dict__.iteritems():
336 if key not in shallow:
337 setattr(new_unit, key, copy.deepcopy(value))
338
339 for key in set(shallow):
340 setattr(new_unit, key, getattr(self, key))
341
342
343 memo[id(self)] = self
344
345 return new_unit
346
348 return copy.deepcopy(self)
349
355
362
363 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
364 """Merges the otherpo (with the same msgid) into this one.
365
366 Overwrite non-blank self.msgstr only if overwrite is True
367 merge comments only if comments is True
368 """
369
370 def mergelists(list1, list2, split=False):
371
372 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
373 for position, item in enumerate(list1):
374 if isinstance(item, str):
375 list1[position] = item.decode("utf-8")
376 for position, item in enumerate(list2):
377 if isinstance(item, str):
378 list2[position] = item.decode("utf-8")
379
380
381 lineend = ""
382 if list1 and list1[0]:
383 for candidate in ["\n", "\r", "\n\r"]:
384 if list1[0].endswith(candidate):
385 lineend = candidate
386 if not lineend:
387 lineend = ""
388 else:
389 lineend = "\n"
390
391
392 if split:
393 splitlist1 = []
394 splitlist2 = []
395 prefix = "#"
396 for item in list1:
397 splitlist1.extend(item.split()[1:])
398 prefix = item.split()[0]
399 for item in list2:
400 splitlist2.extend(item.split()[1:])
401 prefix = item.split()[0]
402 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
403 else:
404
405 if list1 != list2:
406 for item in list2:
407 if lineend:
408 item = item.rstrip() + lineend
409
410 if item not in list1 or len(item) < 5:
411 list1.append(item)
412 if not isinstance(otherpo, pounit):
413 super(pounit, self).merge(otherpo, overwrite, comments)
414 return
415 if comments:
416 mergelists(self.othercomments, otherpo.othercomments)
417 mergelists(self.typecomments, otherpo.typecomments)
418 if not authoritative:
419
420
421 mergelists(self.automaticcomments, otherpo.automaticcomments)
422 mergelists(self.msgidcomments, otherpo.msgidcomments)
423 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
424 if not self.istranslated() or overwrite:
425
426 if self._extract_msgidcomments(otherpo.target):
427 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
428 self.target = otherpo.target
429 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
430 self.markfuzzy()
431 else:
432 self.markfuzzy(otherpo.isfuzzy())
433 elif not otherpo.istranslated():
434 if self.source != otherpo.source:
435 self.markfuzzy()
436 else:
437 if self.target != otherpo.target:
438 self.markfuzzy()
439
441
442
443 return (is_null(self.msgid)
444 and not is_null(self.msgstr)
445 and self.msgidcomments == []
446 and is_null(self.msgctxt)
447 )
448
450 if self.isheader() or len(self.msgidcomments):
451 return False
452 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
453 return True
454 return False
455
456
457
458
463
471
485
491
499
502
508
511
513 """Makes this unit obsolete"""
514 self.obsolete = True
515 if self.msgctxt:
516 self.obsoletemsgctxt = self.msgctxt
517 if self.msgid:
518 self.obsoletemsgid = self.msgid
519 self.msgid = []
520 if self.msgidcomments:
521 self.obsoletemsgidcomments = self.msgidcomments
522 self.msgidcomments = []
523 if self.msgid_plural:
524 self.obsoletemsgid_plural = self.msgid_plural
525 self.msgid_plural = []
526 if self.msgstr:
527 self.obsoletemsgstr = self.msgstr
528 self.msgstr = []
529 self.sourcecomments = []
530 self.automaticcomments = []
531
533 """Makes an obsolete unit normal"""
534 self.obsolete = False
535 if self.obsoletemsgctxt:
536 self.msgid = self.obsoletemsgctxt
537 self.obsoletemsgctxt = []
538 if self.obsoletemsgid:
539 self.msgid = self.obsoletemsgid
540 self.obsoletemsgid = []
541 if self.obsoletemsgidcomments:
542 self.msgidcomments = self.obsoletemsgidcomments
543 self.obsoletemsgidcomments = []
544 if self.obsoletemsgid_plural:
545 self.msgid_plural = self.obsoletemsgid_plural
546 self.obsoletemsgid_plural = []
547 if self.obsoletemsgstr:
548 self.msgstr = self.obsoletemsgstr
549 self.obsoletemgstr = []
550
552 """returns whether this pounit contains plural strings..."""
553 return len(self.msgid_plural) > 0
554
557
559 if isinstance(partlines, dict):
560 partkeys = partlines.keys()
561 partkeys.sort()
562 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
563 partstr = partname + " "
564 partstartline = 0
565 if len(partlines) > 0 and len(partcomments) == 0:
566 partstr += partlines[0]
567 partstartline = 1
568 elif len(partcomments) > 0:
569 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
570
571 partstr += partlines[0] + '\n'
572
573 if len(partlines) > 1:
574 partstartline += 1
575 else:
576
577 partstr += '""\n'
578
579 if len(partcomments) > 1:
580 combinedcomment = []
581 for comment in partcomments:
582 comment = unquotefrompo([comment])
583 if comment.startswith("_:"):
584 comment = comment[len("_:"):]
585 if comment.endswith("\\n"):
586 comment = comment[:-len("\\n")]
587
588 combinedcomment.append(comment)
589 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
590
591 partstr += "\n".join(partcomments)
592 partstr = quote.rstripeol(partstr)
593 else:
594 partstr += '""'
595 partstr += '\n'
596
597 for partline in partlines[partstartline:]:
598 partstr += partline + '\n'
599 return partstr
600
602 """encodes unicode strings and returns other strings unchanged"""
603 if isinstance(output, unicode):
604 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8"))
605 return output.encode(encoding)
606 return output
607
609 """convert to a string. double check that unicode is handled somehow here"""
610 output = self._getoutput()
611 return self._encodeifneccessary(output)
612
614 """return this po element as a string"""
615 def add_prev_msgid_lines(lines, prefix, header, var):
616 if len(var) > 0:
617 lines.append("%s %s %s\n" % (prefix, header, var[0]))
618 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
619
620 def add_prev_msgid_info(lines, prefix):
621 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt)
622 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid)
623 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural)
624
625 lines = []
626 lines.extend(self.othercomments)
627 if self.isobsolete():
628 lines.extend(self.typecomments)
629 obsoletelines = []
630 add_prev_msgid_info(obsoletelines, prefix="#~|")
631 if self.obsoletemsgctxt:
632 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
633 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
634 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
635 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
636 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
637 for index, obsoleteline in enumerate(obsoletelines):
638
639 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
640 lines.extend(obsoletelines)
641 return u"".join(lines)
642
643
644 if is_null(self.msgid):
645 if not (self.isheader() or self.getcontext() or self.sourcecomments):
646 return u"".join(lines)
647 lines.extend(self.automaticcomments)
648 lines.extend(self.sourcecomments)
649 lines.extend(self.typecomments)
650 add_prev_msgid_info(lines, prefix="#|")
651 if self.msgctxt:
652 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt))
653 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments))
654 if self.msgid_plural or self.msgid_pluralcomments:
655 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
656 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr))
657 postr = u"".join(lines)
658 return postr
659
661 """Get a list of locations from sourcecomments in the PO unit
662
663 rtype: List
664 return: A list of the locations with '#: ' stripped
665
666 """
667 locations = []
668 for sourcecomment in self.sourcecomments:
669 locations += quote.rstripeol(sourcecomment)[3:].split()
670 for i, loc in enumerate(locations):
671 locations[i] = urllib.unquote_plus(loc)
672 return locations
673
675 """Add a location to sourcecomments in the PO unit
676
677 @param location: Text location e.g. 'file.c:23' does not include #:
678 @type location: String
679
680 """
681 if location.find(" ") != -1:
682 location = urllib.quote_plus(location)
683 self.sourcecomments.append("#: %s\n" % location)
684
695
701
702 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
703
704 - def getcontext(self):
705 """Get the message context."""
706 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
707
708 - def setcontext(self, context):
709 context = data.forceunicode(context)
710 self.msgctxt = quoteforpo(context)
711
713 """Returns a unique identifier for this unit."""
714 context = self.getcontext()
715
716
717
718
719
720 id = self.source
721 if self.msgidcomments:
722 id = u"_: %s\n%s" % (context, id)
723 elif context:
724 id = u"%s\04%s" % (context, id)
725 return id
726
727
728 -class pofile(pocommon.pofile):
729 """A .po file containing various units"""
730 UnitClass = pounit
731
733 """Parses the given file or file source string."""
734 try:
735 if hasattr(input, 'name'):
736 self.filename = input.name
737 elif not getattr(self, 'filename', ''):
738 self.filename = ''
739 if isinstance(input, str):
740 input = cStringIO.StringIO(input)
741
742 self.units = []
743 poparser.parse_units(poparser.ParseState(input, pounit), self)
744 except Exception, e:
745 raise base.ParseError(e)
746
748 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
749
750
751 id_dict = {}
752 uniqueunits = []
753
754
755 markedpos = []
756 def addcomment(thepo):
757 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
758 markedpos.append(thepo)
759 for thepo in self.units:
760 id = thepo.getid()
761 if thepo.isheader() and not thepo.getlocations():
762
763 uniqueunits.append(thepo)
764 elif id in id_dict:
765 if duplicatestyle == "merge":
766 if id:
767 id_dict[id].merge(thepo)
768 else:
769 addcomment(thepo)
770 uniqueunits.append(thepo)
771 elif duplicatestyle == "msgctxt":
772 origpo = id_dict[id]
773 if origpo not in markedpos:
774 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
775 markedpos.append(thepo)
776 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
777 uniqueunits.append(thepo)
778 else:
779 if not id:
780 if duplicatestyle == "merge":
781 addcomment(thepo)
782 else:
783 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
784 id_dict[id] = thepo
785 uniqueunits.append(thepo)
786 self.units = uniqueunits
787
789 """Convert to a string. double check that unicode is handled somehow here"""
790 output = self._getoutput()
791 if isinstance(output, unicode):
792 return output.encode(getattr(self, "_encoding", "UTF-8"))
793 return output
794
796 """convert the units back to lines"""
797 lines = []
798 for unit in self.units:
799 unitsrc = unit._getoutput() + u"\n"
800 lines.append(unitsrc)
801 lines = u"".join(lines).rstrip()
802
803 if lines:
804 lines += u"\n"
805 return lines
806
808 """encode any unicode strings in lines in self._encoding"""
809 newlines = []
810 encoding = self._encoding
811 if encoding is None or encoding.lower() == "charset":
812 encoding = 'UTF-8'
813 for line in lines:
814 if isinstance(line, unicode):
815 line = line.encode(encoding)
816 newlines.append(line)
817 return newlines
818
820 """decode any non-unicode strings in lines with self._encoding"""
821 newlines = []
822 for line in lines:
823 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
824 try:
825 line = line.decode(self._encoding)
826 except UnicodeError, e:
827 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
828 newlines.append(line)
829 return newlines
830
832 for unit in self.units:
833 if not (unit.isheader() or unit.isobsolete()):
834 yield unit
835