1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base
30 import re
31 import copy
32 import cStringIO
33 import poparser
34
35 lsep = "\n#: "
36 """Seperator for #: entries"""
37
38
39
40 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
41 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
42
44 """Escapes a line for po format. assumes no \n occurs in the line.
45
46 @param line: unescaped text
47 """
48 special_locations = []
49 for special_key in po_escape_map:
50 special_locations.extend(quote.find_all(line, special_key))
51 special_locations = dict.fromkeys(special_locations).keys()
52 special_locations.sort()
53 escaped_line = ""
54 last_location = 0
55 for location in special_locations:
56 escaped_line += line[last_location:location]
57 escaped_line += po_escape_map[line[location:location+1]]
58 last_location = location+1
59 escaped_line += line[last_location:]
60 return escaped_line
61
65
67 """Wrap text for po files."""
68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
69
70
71 if len(wrappedlines) > 1:
72 for index, line in enumerate(wrappedlines[1:]):
73 if line.startswith(' '):
74
75 wrappedlines[index+1] = line[1:]
76
77
78 wrappedlines[index] += ' '
79 return wrappedlines
80
82 """quotes the given text for a PO file, returning quoted and escaped lines"""
83 polines = []
84 if text is None:
85 return polines
86 lines = text.split("\n")
87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
88 if len(lines) != 2 or lines[1]:
89 polines.extend(['""'])
90 for line in lines[:-1]:
91
92 lns = wrapline(line)
93 if len(lns) > 0:
94 for ln in lns[:-1]:
95 polines.extend(['"' + escapeforpo(ln) + '"'])
96 if lns[-1]:
97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
98 else:
99 polines.extend(['"\\n"'])
100 if lines[-1]:
101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
102 return polines
103
105 """Remove quote and unescape line from po file.
106
107 @param line: a quoted line from a po file (msgid or msgstr)
108 """
109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
110 return extracted
111
114
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8.
117 This function is used to ensure that a valid encoding is always used."""
118 if encoding == "CHARSET" or encoding == None:
119 return 'utf-8'
120 return encoding
121
122
123
124
125
126
127
128
130 return lst == [] or len(lst) == 1 and lst[0] == '""'
131
133 left = string.find('"')
134 right = string.rfind('"')
135 if right > -1:
136 return string[left:right+1]
137 else:
138 return string[left:] + '"'
139
140 -class pounit(pocommon.pounit):
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155 __shallow__ = ['_store']
156
157 - def __init__(self, source=None, encoding="UTF-8"):
158 self._encoding = encodingToUse(encoding)
159 self.obsolete = False
160 self._initallcomments(blankall=True)
161 self.prev_msgctxt = []
162 self.prev_msgid = []
163 self.prev_msgid_plural = []
164 self.msgctxt = []
165 self.msgid = []
166 self.msgid_pluralcomments = []
167 self.msgid_plural = []
168 self.msgstr = []
169 self.obsoletemsgctxt = []
170 self.obsoletemsgid = []
171 self.obsoletemsgid_pluralcomments = []
172 self.obsoletemsgid_plural = []
173 self.obsoletemsgstr = []
174 pocommon.pounit.__init__(self, source)
175
185
193
194 allcomments = property(_get_all_comments)
195
204
222
226
228 """Sets the msgid to the given (unescaped) value.
229
230 @param source: an unescaped source string.
231 """
232 self.msgid, self.msgid_plural = self._set_source_vars(source)
233 source = property(getsource, setsource)
234
236 """Returns the unescaped msgid"""
237 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
238
240 """Sets the msgid to the given (unescaped) value.
241
242 @param source: an unescaped source string.
243 """
244 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
245 prev_source = property(_get_prev_source, _set_prev_source)
246
254
256 """Sets the msgstr to the given (unescaped) value"""
257 self._rich_target = None
258 if isinstance(target, str):
259 target = target.decode(self._encoding)
260 if self.hasplural():
261 if isinstance(target, multistring):
262 target = target.strings
263 elif isinstance(target, basestring):
264 target = [target]
265 elif isinstance(target, (dict, list)):
266 if len(target) == 1:
267 target = target[0]
268 else:
269 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
270 templates = self.msgstr
271 if isinstance(templates, list):
272 templates = {0: templates}
273 if isinstance(target, list):
274 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
275 elif isinstance(target, dict):
276 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
277 else:
278 self.msgstr = quoteforpo(target)
279 target = property(gettarget, settarget)
280
282 """Return comments based on origin value (programmer, developer, source code and translator)"""
283 if origin == None:
284 comments = u"".join([comment[2:] for comment in self.othercomments])
285 comments += u"".join([comment[3:] for comment in self.automaticcomments])
286 elif origin == "translator":
287 comments = u"".join ([comment[2:] for comment in self.othercomments])
288 elif origin in ["programmer", "developer", "source code"]:
289 comments = u"".join([comment[3:] for comment in self.automaticcomments])
290 else:
291 raise ValueError("Comment type not valid")
292
293 return comments[:-1]
294
295 - def addnote(self, text, origin=None, position="append"):
296 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
297
298 if not (text and text.strip()):
299 return
300 text = data.forceunicode(text)
301 commentlist = self.othercomments
302 linestart = "# "
303 if origin in ["programmer", "developer", "source code"]:
304 autocomments = True
305 commentlist = self.automaticcomments
306 linestart = "#. "
307 text = text.split("\n")
308 if position == "append":
309 commentlist += [linestart + line + "\n" for line in text]
310 else:
311 newcomments = [linestart + line + "\n" for line in text]
312 newcomments += [line for line in commentlist]
313 if autocomments:
314 self.automaticcomments = newcomments
315 else:
316 self.othercomments = newcomments
317
319 """Remove all the translator's notes (other comments)"""
320 self.othercomments = []
321
323
324 new_unit = self.__class__()
325
326
327 shallow = set(self.__shallow__)
328
329 for key, value in self.__dict__.iteritems():
330 if key not in shallow:
331 setattr(new_unit, key, copy.deepcopy(value))
332
333 for key in set(shallow):
334 setattr(new_unit, key, getattr(self, key))
335
336
337 memo[id(self)] = self
338
339 return new_unit
340
342 return copy.deepcopy(self)
343
349
351 if isinstance(self.msgstr, dict):
352 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()])
353 return len(combinedstr.strip())
354 else:
355 return len(unquotefrompo(self.msgstr).strip())
356
357 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
358 """Merges the otherpo (with the same msgid) into this one.
359
360 Overwrite non-blank self.msgstr only if overwrite is True
361 merge comments only if comments is True
362 """
363
364 def mergelists(list1, list2, split=False):
365
366 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
367 for position, item in enumerate(list1):
368 if isinstance(item, str):
369 list1[position] = item.decode("utf-8")
370 for position, item in enumerate(list2):
371 if isinstance(item, str):
372 list2[position] = item.decode("utf-8")
373
374
375 lineend = ""
376 if list1 and list1[0]:
377 for candidate in ["\n", "\r", "\n\r"]:
378 if list1[0].endswith(candidate):
379 lineend = candidate
380 if not lineend:
381 lineend = ""
382 else:
383 lineend = "\n"
384
385
386 if split:
387 splitlist1 = []
388 splitlist2 = []
389 prefix = "#"
390 for item in list1:
391 splitlist1.extend(item.split()[1:])
392 prefix = item.split()[0]
393 for item in list2:
394 splitlist2.extend(item.split()[1:])
395 prefix = item.split()[0]
396 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
397 else:
398
399 if list1 != list2:
400 for item in list2:
401 if lineend:
402 item = item.rstrip() + lineend
403
404 if item not in list1 or len(item) < 5:
405 list1.append(item)
406 if not isinstance(otherpo, pounit):
407 super(pounit, self).merge(otherpo, overwrite, comments)
408 return
409 if comments:
410 mergelists(self.othercomments, otherpo.othercomments)
411 mergelists(self.typecomments, otherpo.typecomments)
412 if not authoritative:
413
414
415 mergelists(self.automaticcomments, otherpo.automaticcomments)
416 mergelists(self.msgidcomments, otherpo.msgidcomments)
417 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
418 if not self.istranslated() or overwrite:
419
420 if self._extract_msgidcomments(otherpo.target):
421 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
422 self.target = otherpo.target
423 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
424 self.markfuzzy()
425 else:
426 self.markfuzzy(otherpo.isfuzzy())
427 elif not otherpo.istranslated():
428 if self.source != otherpo.source:
429 self.markfuzzy()
430 else:
431 if self.target != otherpo.target:
432 self.markfuzzy()
433
435
436
437 return (is_null(self.msgid)
438 and not is_null(self.msgstr)
439 and self.msgidcomments == []
440 and is_null(self.msgctxt)
441 )
442
444 if self.isheader() or len(self.msgidcomments):
445 return False
446 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
447 return True
448 return False
449
450
451
452
457
465
475
478
481
484
486 """Makes this unit obsolete"""
487 self.obsolete = True
488 if self.msgctxt:
489 self.obsoletemsgctxt = self.msgctxt
490 if self.msgid:
491 self.obsoletemsgid = self.msgid
492 self.msgid = []
493 if self.msgidcomments:
494 self.obsoletemsgidcomments = self.msgidcomments
495 self.msgidcomments = []
496 if self.msgid_plural:
497 self.obsoletemsgid_plural = self.msgid_plural
498 self.msgid_plural = []
499 if self.msgstr:
500 self.obsoletemsgstr = self.msgstr
501 self.msgstr = []
502 self.sourcecomments = []
503 self.automaticcomments = []
504
506 """Makes an obsolete unit normal"""
507 self.obsolete = False
508 if self.obsoletemsgctxt:
509 self.msgid = self.obsoletemsgctxt
510 self.obsoletemsgctxt = []
511 if self.obsoletemsgid:
512 self.msgid = self.obsoletemsgid
513 self.obsoletemsgid = []
514 if self.obsoletemsgidcomments:
515 self.msgidcomments = self.obsoletemsgidcomments
516 self.obsoletemsgidcomments = []
517 if self.obsoletemsgid_plural:
518 self.msgid_plural = self.obsoletemsgid_plural
519 self.obsoletemsgid_plural = []
520 if self.obsoletemsgstr:
521 self.msgstr = self.obsoletemsgstr
522 self.obsoletemgstr = []
523
525 """returns whether this pounit contains plural strings..."""
526 return len(self.msgid_plural) > 0
527
530
532 if isinstance(partlines, dict):
533 partkeys = partlines.keys()
534 partkeys.sort()
535 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
536 partstr = partname + " "
537 partstartline = 0
538 if len(partlines) > 0 and len(partcomments) == 0:
539 partstr += partlines[0]
540 partstartline = 1
541 elif len(partcomments) > 0:
542 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
543
544 partstr += partlines[0] + '\n'
545
546 if len(partlines) > 1:
547 partstartline += 1
548 else:
549
550 partstr += '""\n'
551
552 if len(partcomments) > 1:
553 combinedcomment = []
554 for comment in partcomments:
555 comment = unquotefrompo([comment])
556 if comment.startswith("_:"):
557 comment = comment[len("_:"):]
558 if comment.endswith("\\n"):
559 comment = comment[:-len("\\n")]
560
561 combinedcomment.append(comment)
562 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
563
564 partstr += "\n".join(partcomments)
565 partstr = quote.rstripeol(partstr)
566 else:
567 partstr += '""'
568 partstr += '\n'
569
570 for partline in partlines[partstartline:]:
571 partstr += partline + '\n'
572 return partstr
573
575 """encodes unicode strings and returns other strings unchanged"""
576 if isinstance(output, unicode):
577 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
578 return output.encode(encoding)
579 return output
580
582 """convert to a string. double check that unicode is handled somehow here"""
583 output = self._getoutput()
584 return self._encodeifneccessary(output)
585
587 """return this po element as a string"""
588 def add_prev_msgid_lines(lines, header, var):
589 if len(var) > 0:
590 lines.append("#| %s %s\n" % (header, var[0]))
591 lines.extend("#| %s\n" % line for line in var[1:])
592
593 def add_prev_msgid_info(lines):
594 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt)
595 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid)
596 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural)
597
598 lines = []
599 lines.extend(self.othercomments)
600 if self.isobsolete():
601 lines.extend(self.typecomments)
602 obsoletelines = []
603 if self.obsoletemsgctxt:
604 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
605 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
606 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
607 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
608 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
609 for index, obsoleteline in enumerate(obsoletelines):
610
611 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
612 lines.extend(obsoletelines)
613 lines = [self._encodeifneccessary(line) for line in lines]
614 return "".join(lines)
615
616
617 if is_null(self.msgid):
618 if not (self.isheader() or self.getcontext() or self.sourcecomments):
619 return "".join(lines)
620 lines.extend(self.automaticcomments)
621 lines.extend(self.sourcecomments)
622 lines.extend(self.typecomments)
623 add_prev_msgid_info(lines)
624 if self.msgctxt:
625 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
626 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
627 if self.msgid_plural or self.msgid_pluralcomments:
628 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
629 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
630 lines = [self._encodeifneccessary(line) for line in lines]
631 postr = "".join(lines)
632 return postr
633
635 """Get a list of locations from sourcecomments in the PO unit
636
637 rtype: List
638 return: A list of the locations with '#: ' stripped
639
640 """
641 locations = []
642 for sourcecomment in self.sourcecomments:
643 locations += quote.rstripeol(sourcecomment)[3:].split()
644 return locations
645
647 """Add a location to sourcecomments in the PO unit
648
649 @param location: Text location e.g. 'file.c:23' does not include #:
650 @type location: String
651
652 """
653 self.sourcecomments.append("#: %s\n" % location)
654
665
671
672 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
673
674 - def getcontext(self):
675 """Get the message context."""
676 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
677
679 """Returns a unique identifier for this unit."""
680 context = self.getcontext()
681
682
683
684
685
686 id = self.source
687 if self.msgidcomments:
688 id = u"_: %s\n%s" % (context, id)
689 elif context:
690 id = u"%s\04%s" % (context, id)
691 return id
692
693 -class pofile(pocommon.pofile):
694 """A .po file containing various units"""
695 UnitClass = pounit
696
698 """Construct a pofile, optionally reading in from inputfile.
699 encoding can be specified but otherwise will be read from the PO header"""
700 self.UnitClass = unitclass
701 pocommon.pofile.__init__(self, unitclass=unitclass)
702 self.units = []
703 self.filename = ''
704 self._encoding = encodingToUse(encoding)
705 if inputfile is not None:
706 self.parse(inputfile)
707
709 """Deprecated: changes the encoding on the file."""
710
711
712
713 raise DeprecationWarning
714
715 self._encoding = encodingToUse(newencoding)
716 if not self.units:
717 return
718 header = self.header()
719 if not header or header.isblank():
720 return
721 charsetline = None
722 headerstr = unquotefrompo(header.msgstr)
723 for line in headerstr.split("\n"):
724 if not ":" in line:
725 continue
726 key, value = line.strip().split(":", 1)
727 if key.strip() != "Content-Type":
728 continue
729 charsetline = line
730 if charsetline is None:
731 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
732 else:
733 charset = re.search("charset=([^ ]*)", charsetline)
734 if charset is None:
735 newcharsetline = charsetline
736 if not newcharsetline.strip().endswith(";"):
737 newcharsetline += ";"
738 newcharsetline += " charset=%s" % self._encoding
739 else:
740 charset = charset.group(1)
741 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
742 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
743 header.msgstr = quoteforpo(headerstr)
744
746 """Parses the given file or file source string."""
747 try:
748 if hasattr(input, 'name'):
749 self.filename = input.name
750 elif not getattr(self, 'filename', ''):
751 self.filename = ''
752 if isinstance(input, str):
753 input = cStringIO.StringIO(input)
754 poparser.parse_units(poparser.ParseState(input, pounit), self)
755 except Exception, e:
756 raise base.ParseError(e)
757
759 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
760
761
762 id_dict = {}
763 uniqueunits = []
764
765
766 markedpos = []
767 def addcomment(thepo):
768 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
769 markedpos.append(thepo)
770 for thepo in self.units:
771 id = thepo.getid()
772 if thepo.isheader() and not thepo.getlocations():
773
774 uniqueunits.append(thepo)
775 elif id in id_dict:
776 if duplicatestyle == "merge":
777 if id:
778 id_dict[id].merge(thepo)
779 else:
780 addcomment(thepo)
781 uniqueunits.append(thepo)
782 elif duplicatestyle == "msgctxt":
783 origpo = id_dict[id]
784 if origpo not in markedpos:
785 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
786 markedpos.append(thepo)
787 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
788 uniqueunits.append(thepo)
789 else:
790 if not id:
791 if duplicatestyle == "merge":
792 addcomment(thepo)
793 else:
794 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
795 id_dict[id] = thepo
796 uniqueunits.append(thepo)
797 self.units = uniqueunits
798
800 """Convert to a string. double check that unicode is handled somehow here"""
801 output = self._getoutput()
802 if isinstance(output, unicode):
803 return output.encode(getattr(self, "encoding", "UTF-8"))
804 return output
805
807 """convert the units back to lines"""
808 lines = []
809 for unit in self.units:
810 unitsrc = str(unit) + "\n"
811 lines.append(unitsrc)
812 lines = "".join(self.encode(lines)).rstrip()
813
814 if lines:
815 lines += "\n"
816 return lines
817
819 """encode any unicode strings in lines in self._encoding"""
820 newlines = []
821 encoding = self._encoding
822 if encoding is None or encoding.lower() == "charset":
823 encoding = 'UTF-8'
824 for line in lines:
825 if isinstance(line, unicode):
826 line = line.encode(encoding)
827 newlines.append(line)
828 return newlines
829
831 """decode any non-unicode strings in lines with self._encoding"""
832 newlines = []
833 for line in lines:
834 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
835 try:
836 line = line.decode(self._encoding)
837 except UnicodeError, e:
838 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
839 newlines.append(line)
840 return newlines
841
846