1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Module for handling XLIFF files for translation.
22
23 The official recommendation is to use the extention .xlf for XLIFF files.
24 """
25
26 from lxml import etree
27
28 from translate.misc.multistring import multistring
29 from translate.storage import base, lisa
30 from translate.storage.lisa import getXMLspace
31 from translate.storage.placeables.lisa import xml_to_strelem, strelem_to_xml
32
33
34
35 ID_SEPARATOR = u"\04"
36
37
38
39
40
41 ID_SEPARATOR_SAFE = u"__%04__"
43 """A single term in the xliff file."""
44
45 rootNode = "trans-unit"
46 languageNode = "source"
47 textNode = ""
48 namespace = 'urn:oasis:names:tc:xliff:document:1.1'
49
50 _default_xml_space = "default"
51
52
53
54 - def __init__(self, source, empty=False, **kwargs):
55 """Override the constructor to set xml:space="preserve"."""
56 super(xliffunit, self).__init__(source, empty, **kwargs)
57 if empty:
58 return
59 lisa.setXMLspace(self.xmlelement, "preserve")
60
62 """Returns an xml Element setup with given parameters."""
63
64
65
66
67 assert purpose
68 langset = etree.Element(self.namespaced(purpose))
69
70
71
72
73 langset.text = text
74 return langset
75
91
93 sourcelanguageNode = self.get_source_dom()
94 if sourcelanguageNode is None:
95 sourcelanguageNode = self.createlanguageNode(sourcelang, u'', "source")
96 self.set_source_dom(sourcelanguageNode)
97
98
99 for i in range(len(sourcelanguageNode)):
100 del sourcelanguageNode[0]
101 sourcelanguageNode.text = None
102
103 strelem_to_xml(sourcelanguageNode, value[0])
104
111 rich_source = property(get_rich_source, set_rich_source)
112
129
134 rich_target = property(get_rich_target, set_rich_target)
135
136 - def addalttrans(self, txt, origin=None, lang=None, sourcetxt=None, matchquality=None):
137 """Adds an alt-trans tag and alt-trans components to the unit.
138
139 @type txt: String
140 @param txt: Alternative translation of the source text.
141 """
142
143
144
145 if isinstance(txt, str):
146 txt = txt.decode("utf-8")
147 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans"))
148 lisa.setXMLspace(alttrans, "preserve")
149 if sourcetxt:
150 if isinstance(sourcetxt, str):
151 sourcetxt = sourcetxt.decode("utf-8")
152 altsource = etree.SubElement(alttrans, self.namespaced("source"))
153 altsource.text = sourcetxt
154 alttarget = etree.SubElement(alttrans, self.namespaced("target"))
155 alttarget.text = txt
156 if matchquality:
157 alttrans.set("match-quality", matchquality)
158 if origin:
159 alttrans.set("origin", origin)
160 if lang:
161 lisa.setXMLlang(alttrans, lang)
162
189
191 """Removes the supplied alternative from the list of alt-trans tags"""
192 self.xmlelement.remove(alternative.xmlelement)
193
194 - def addnote(self, text, origin=None, position="append"):
195 """Add a note specifically in a "note" tag"""
196 if position != "append":
197 self.removenotes(origin=origin)
198
199 if text:
200 text = text.strip()
201 if not text:
202 return
203 if isinstance(text, str):
204 text = text.decode("utf-8")
205 note = etree.SubElement(self.xmlelement, self.namespaced("note"))
206 note.text = text
207 if origin:
208 note.set("from", origin)
209
211 """Private method that returns the text from notes matching 'origin' or all notes."""
212 notenodes = self.xmlelement.iterdescendants(self.namespaced("note"))
213
214
215
216 initial_list = [lisa.getText(note, getXMLspace(self.xmlelement, self._default_xml_space)) for note in notenodes if self.correctorigin(note, origin)]
217
218
219 dictset = {}
220 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset]
221
222 return notelist
223
226
228 """Remove all the translator notes."""
229 notes = self.xmlelement.iterdescendants(self.namespaced("note"))
230 for note in notes:
231 if self.correctorigin(note, origin=origin):
232 self.xmlelement.remove(note)
233
234 - def adderror(self, errorname, errortext):
235 """Adds an error message to this unit."""
236
237 text = errorname
238 if errortext:
239 text += ': ' + errortext
240 self.addnote(text, origin="pofilter")
241
243 """Get all error messages."""
244
245 notelist = self.getnotelist(origin="pofilter")
246 errordict = {}
247 for note in notelist:
248 errorname, errortext = note.split(': ')
249 errordict[errorname] = errortext
250 return errordict
251
253 """States whether this unit is approved."""
254 return self.xmlelement.get("approved") == "yes"
255
257 """Mark this unit as approved."""
258 if value:
259 self.xmlelement.set("approved", "yes")
260 elif self.isapproved():
261 self.xmlelement.set("approved", "no")
262
264 """States whether this unit needs to be reviewed"""
265 targetnode = self.getlanguageNode(lang=None, index=1)
266 return not targetnode is None and \
267 "needs-review" in targetnode.get("state", "")
268
270 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note."""
271 targetnode = self.getlanguageNode(lang=None, index=1)
272 if not targetnode is None:
273 if needsreview:
274 targetnode.set("state", "needs-review-translation")
275 if explanation:
276 self.addnote(explanation, origin="translator")
277 else:
278 del targetnode.attrib["state"]
279
286
288 if value:
289 self.markapproved(False)
290 else:
291 self.markapproved(True)
292 targetnode = self.getlanguageNode(lang=None, index=1)
293 if not targetnode is None:
294 if value:
295 targetnode.set("state", "needs-review-translation")
296 else:
297 for attribute in ["state", "state-qualifier"]:
298 if attribute in targetnode.attrib:
299 del targetnode.attrib[attribute]
300
301 - def settarget(self, text, lang='xx', append=False):
306
307
308
309
310
311
312
313
315 value = self.xmlelement.get("translate")
316 if value and value.lower() == 'no':
317 return False
318 return True
319
321 targetnode = self.getlanguageNode(lang=None, index=1)
322 if targetnode is None:
323 return
324 if self.isfuzzy() and "state-qualifier" in targetnode.attrib:
325
326 del targetnode.attrib["state-qualifier"]
327 targetnode.set("state", "translated")
328
332
345
348
350 id_attr = self.xmlelement.get("id")
351 if id_attr:
352 return [id_attr]
353 return []
354
355 - def createcontextgroup(self, name, contexts=None, purpose=None):
356 """Add the context group to the trans-unit with contexts a list with
357 (type, text) tuples describing each context."""
358 assert contexts
359 group = etree.Element(self.namespaced("context-group"))
360
361
362
363 if self.xmlelement.tag == self.namespaced("group"):
364 self.xmlelement.insert(0, group)
365 else:
366 self.xmlelement.append(group)
367 group.set("name", name)
368 if purpose:
369 group.set("purpose", purpose)
370 for type, text in contexts:
371 if isinstance(text, str):
372 text = text.decode("utf-8")
373 context = etree.SubElement(group, self.namespaced("context"))
374 context.text = text
375 context.set("context-type", type)
376
377 - def getcontextgroups(self, name):
378 """Returns the contexts in the context groups with the specified name"""
379 groups = []
380 grouptags = self.xmlelement.iterdescendants(self.namespaced("context-group"))
381
382 for group in grouptags:
383 if group.get("name") == name:
384 contexts = group.iterdescendants(self.namespaced("context"))
385 pairs = []
386 for context in contexts:
387 pairs.append((context.get("context-type"), lisa.getText(context, getXMLspace(self.xmlelement, self._default_xml_space))))
388 groups.append(pairs)
389 return groups
390
392 """returns the restype attribute in the trans-unit tag"""
393 return self.xmlelement.get("restype")
394
395 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
406
408 """Check against node tag's origin (e.g note or alt-trans)"""
409 if origin == None:
410 return True
411 elif origin in node.get("from", ""):
412 return True
413 elif origin in node.get("origin", ""):
414 return True
415 else:
416 return False
417
419 """Override L{TranslationUnit.multistring_to_rich} which is used by the
420 C{rich_source} and C{rich_target} properties."""
421 strings = mstr
422 if isinstance(mstr, multistring):
423 strings = mstr.strings
424 elif isinstance(mstr, basestring):
425 strings = [mstr]
426
427 return [xml_to_strelem(s) for s in strings]
428 multistring_to_rich = classmethod(multistring_to_rich)
429
431 """Override L{TranslationUnit.rich_to_multistring} which is used by the
432 C{rich_source} and C{rich_target} properties."""
433 return multistring([unicode(elem) for elem in elem_list])
434 rich_to_multistring = classmethod(rich_to_multistring)
435
436
438 """Class representing a XLIFF file store."""
439 UnitClass = xliffunit
440 Name = _("XLIFF Translation File")
441 Mimetypes = ["application/x-xliff", "application/x-xliff+xml"]
442 Extensions = ["xlf", "xliff"]
443 rootNode = "xliff"
444 bodyNode = "body"
445 XMLskeleton = '''<?xml version="1.0" ?>
446 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'>
447 <file original='NoName' source-language='en' datatype='plaintext'>
448 <body>
449 </body>
450 </file>
451 </xliff>'''
452 namespace = 'urn:oasis:names:tc:xliff:document:1.1'
453 suggestions_in_format = True
454 """xliff units have alttrans tags which can be used to store suggestions"""
455
457 self._filename = None
458 lisa.LISAfile.__init__(self, *args, **kwargs)
459 self._messagenum = 0
460
461 - def initbody(self):
462 self.namespace = self.document.getroot().nsmap.get(None, None)
463
464 if self._filename:
465 filenode = self.getfilenode(self._filename, createifmissing=True)
466 else:
467 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
468 self.body = self.getbodynode(filenode, createifmissing=True)
469
471 """Initialise the file header."""
472 pass
473
474 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
499
501 """returns the name of the given file"""
502 return filenode.get("original")
503
505 """set the name of the given file"""
506 return filenode.set("original", filename)
507
509 """returns all filenames in this XLIFF file"""
510 filenodes = self.document.getroot().iterchildren(self.namespaced("file"))
511 filenames = [self.getfilename(filenode) for filenode in filenodes]
512 filenames = filter(None, filenames)
513 if len(filenames) == 1 and filenames[0] == '':
514 filenames = []
515 return filenames
516
517 - def getfilenode(self, filename, createifmissing=False):
518 """finds the filenode with the given name"""
519 filenodes = self.document.getroot().iterchildren(self.namespaced("file"))
520 for filenode in filenodes:
521 if self.getfilename(filenode) == filename:
522 return filenode
523 if createifmissing:
524 filenode = self.createfilenode(filename)
525 return filenode
526 return None
527
528 - def getids(self, filename=None):
529 if not filename:
530 return super(xlifffile, self).getids()
531
532 self.id_index = {}
533 prefix = filename + ID_SEPARATOR
534 units = (unit for unit in self.units if unit.getid().startswith(prefix))
535 for index, unit in enumerate(units):
536 self.id_index[unit.getid()[len(prefix):]] = unit
537 return self.id_index.keys()
538
540 if not language:
541 return
542 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
543 filenode.set("source-language", language)
544
546 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
547 return filenode.get("source-language")
548 sourcelanguage = property(getsourcelanguage, setsourcelanguage)
549
551 if not language:
552 return
553 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
554 filenode.set("target-language", language)
555
557 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
558 return filenode.get("target-language")
559 targetlanguage = property(gettargetlanguage, settargetlanguage)
560
562 """Returns the datatype of the stored file. If no filename is given,
563 the datatype of the first file is given."""
564 if filename:
565 node = self.getfilenode(filename)
566 if not node is None:
567 return node.get("datatype")
568 else:
569 filenames = self.getfilenames()
570 if len(filenames) > 0 and filenames[0] != "NoName":
571 return self.getdatatype(filenames[0])
572 return ""
573
575 """Returns the date attribute for the file. If no filename is given,
576 the date of the first file is given. If the date attribute is not
577 specified, None is returned."""
578 if filename:
579 node = self.getfilenode(filename)
580 if not node is None:
581 return node.get("date")
582 else:
583 filenames = self.getfilenames()
584 if len(filenames) > 0 and filenames[0] != "NoName":
585 return self.getdate(filenames[0])
586 return None
587
589 """We want to remove the default file-tag as soon as possible if we
590 know if still present and empty."""
591 filenodes = list(self.document.getroot().iterchildren(self.namespaced("file")))
592 if len(filenodes) > 1:
593 for filenode in filenodes:
594 if filenode.get("original") == "NoName" and \
595 not list(filenode.iterdescendants(self.namespaced(self.UnitClass.rootNode))):
596 self.document.getroot().remove(filenode)
597 break
598
600 """finds the header node for the given filenode"""
601
602 headernode = filenode.iterchildren(self.namespaced("header"))
603 try:
604 return headernode.next()
605 except StopIteration:
606 pass
607 if not createifmissing:
608 return None
609 headernode = etree.SubElement(filenode, self.namespaced("header"))
610 return headernode
611
612 - def getbodynode(self, filenode, createifmissing=False):
613 """finds the body node for the given filenode"""
614 bodynode = filenode.iterchildren(self.namespaced("body"))
615 try:
616 return bodynode.next()
617 except StopIteration:
618 pass
619 if not createifmissing:
620 return None
621 bodynode = etree.SubElement(filenode, self.namespaced("body"))
622 return bodynode
623
624 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
625 """adds the given trans-unit to the last used body node if the
626 filename has changed it uses the slow method instead (will
627 create the nodes required if asked). Returns success"""
628 if self._filename != filename:
629 if not self.switchfile(filename, createifmissing):
630 return None
631 unit = super(xlifffile, self).addsourceunit(source)
632 self._messagenum += 1
633 unit.setid("%d" % self._messagenum)
634 return unit
635
636 - def switchfile(self, filename, createifmissing=False):
637 """adds the given trans-unit (will create the nodes required if asked). Returns success"""
638 self._filename = filename
639 filenode = self.getfilenode(filename)
640 if filenode is None:
641 if not createifmissing:
642 return False
643 filenode = self.createfilenode(filename)
644 self.document.getroot().append(filenode)
645
646 self.body = self.getbodynode(filenode, createifmissing=createifmissing)
647 if self.body is None:
648 return False
649 self._messagenum = len(list(self.body.iterdescendants(self.namespaced("trans-unit"))))
650
651
652
653
654
655 return True
656
657 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
658 """adds a group tag into the specified file"""
659 if self._filename != filename:
660 if not self.switchfile(filename, createifmissing):
661 return None
662 group = etree.SubElement(self.body, self.namespaced("group"))
663 if restype:
664 group.set("restype", restype)
665 return group
666
670
682 parsestring = classmethod(parsestring)
683