1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 try:
26 from lxml import etree
27 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \
28 getXMLspace, setXMLspace, namespaced
29 except ImportError, e:
30 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
31
32 from translate.storage import base
33 from translate.lang import data
34
35
37 """generate match objects for all L{re_obj} matches in L{text}."""
38 start = 0
39 max = len(text)
40 while start < max:
41 m = re_obj.search(text, start)
42 if not m:
43 break
44 yield m
45 start = m.end()
46
47
48 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)',
49 '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
50 re_placeholders = [re.compile(ph) for ph in placeholders]
51
52
63
64
66 """
67 A single unit in the file. Provisional work is done to make several
68 languages possible.
69 """
70
71
72 rootNode = ""
73
74
75 languageNode = ""
76
77 textNode = ""
78
79 namespace = None
80 _default_xml_space = "preserve"
81 """The default handling of spacing in the absense of an xml:space
82 attribute.
83
84 This is mostly for correcting XLIFF behaviour."""
85
86 - def __init__(self, source, empty=False, **kwargs):
87 """Constructs a unit containing the given source string"""
88 self._rich_source = None
89 self._rich_target = None
90 if empty:
91 self._state_n = 0
92 return
93 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
94
95 super(LISAunit, self).__init__(source)
96
116
118 """Returns name in Clark notation.
119
120 For example namespaced("source") in an XLIFF document might return::
121 {urn:oasis:names:tc:xliff:document:1.1}source
122 This is needed throughout lxml.
123 """
124 return namespaced(self.namespace, name)
125
127 languageNodes = self.getlanguageNodes()
128 if len(languageNodes) > 0:
129 self.xmlelement.replace(languageNodes[0], dom_node)
130 else:
131 self.xmlelement.append(dom_node)
132
135 source_dom = property(get_source_dom, set_source_dom)
136
142
147 source = property(getsource, setsource)
148
150 languageNodes = self.getlanguageNodes()
151 assert len(languageNodes) > 0
152 if dom_node is not None:
153 if append or len(languageNodes) == 0:
154 self.xmlelement.append(dom_node)
155 else:
156 self.xmlelement.insert(1, dom_node)
157 if not append and len(languageNodes) > 1:
158 self.xmlelement.remove(languageNodes[1])
159
165 target_dom = property(get_target_dom)
166
167 - def settarget(self, text, lang='xx', append=False):
194
201 target = property(gettarget, settarget)
202
204 """Returns a xml Element setup with given parameters to represent a
205 single language entry. Has to be overridden."""
206 return None
207
236
238 """Returns a list of all nodes that contain per language information.
239 """
240 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
241
243 """Retrieves a languageNode either by language or by index"""
244 if lang is None and index is None:
245 raise KeyError("No criterea for languageNode given")
246 languageNodes = self.getlanguageNodes()
247 if lang:
248 for set in languageNodes:
249 if getXMLlang(set) == lang:
250 return set
251 else:
252 if index >= len(languageNodes):
253 return None
254 else:
255 return languageNodes[index]
256 return None
257
258 - def getNodeText(self, languageNode, xml_space="preserve"):
259 """Retrieves the term from the given languageNode"""
260 if languageNode is None:
261 return None
262 if self.textNode:
263 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
264 if terms is None:
265 return None
266 else:
267 return getText(terms.next(), xml_space)
268 else:
269 return getText(languageNode, xml_space)
270
272 return etree.tostring(self.xmlelement, pretty_print=True,
273 encoding='utf-8')
274
277
278 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
279 lambda self, value: self._set_property(self.namespaced('xid'), value))
280
281 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
282 lambda self, value: self._set_property(self.namespaced('rid'), value))
283
285 term = cls(None, empty=True)
286 term.xmlelement = element
287 return term
288 createfromxmlElement = classmethod(createfromxmlElement)
289
290
292 """A class representing a file store for one of the LISA file formats."""
293 UnitClass = LISAunit
294
295 rootNode = ""
296
297 bodyNode = ""
298
299 XMLskeleton = ""
300
301 namespace = None
302
303 - def __init__(self, inputfile=None, sourcelanguage='en',
304 targetlanguage=None, unitclass=None):
317
319 """Method to be overridden to initialise headers, etc."""
320 pass
321
323 """Returns name in Clark notation.
324
325 For example namespaced("source") in an XLIFF document might return::
326 {urn:oasis:names:tc:xliff:document:1.1}source
327 This is needed throughout lxml.
328 """
329 return namespaced(self.namespace, name)
330
331 - def initbody(self):
332 """Initialises self.body so it never needs to be retrieved from the
333 XML again."""
334 self.namespace = self.document.getroot().nsmap.get(None, None)
335 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
336
338
339 """Adds and returns a new unit with the given string as first entry."""
340 newunit = self.UnitClass(source)
341 self.addunit(newunit)
342 return newunit
343
344 - def addunit(self, unit, new=True):
349
351 """Converts to a string containing the file's XML"""
352 return etree.tostring(self.document, pretty_print=True,
353 xml_declaration=True, encoding='utf-8')
354
356 """Populates this object from the given xml string"""
357 if not hasattr(self, 'filename'):
358 self.filename = getattr(xml, 'name', '')
359 if hasattr(xml, "read"):
360 xml.seek(0)
361 posrc = xml.read()
362 xml = posrc
363 if etree.LXML_VERSION >= (2, 1, 0):
364
365
366 parser = etree.XMLParser(strip_cdata=False)
367 else:
368 parser = etree.XMLParser()
369 self.document = etree.fromstring(xml, parser).getroottree()
370 self._encoding = self.document.docinfo.encoding
371 self.initbody()
372 assert self.document.getroot().tag == self.namespaced(self.rootNode)
373 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
374 term = self.UnitClass.createfromxmlElement(entry)
375 self.addunit(term, new=False)
376