Package translate :: Package misc :: Module ourdom
[hide private]
[frames] | no frames]

Source Code for Module translate.misc.ourdom

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """module that provides modified DOM functionality for our needs 
 24   
 25  Note that users of ourdom should ensure that no code might still use classes  
 26  directly from minidom, like minidom.Element, minidom.Document or methods such  
 27  as minidom.parseString, since the functionality provided here will not be in  
 28  those objects. 
 29  """ 
 30   
 31  from xml.dom import minidom 
 32  from xml.dom import expatbuilder 
 33   
 34  # helper functions we use to do xml the way we want, used by modified classes below 
 35   
36 -def writexml_helper(self, writer, indent="", addindent="", newl=""):
37 """A replacement for writexml that formats it like typical XML files. 38 Nodes are intendented but text nodes, where whitespace can be significant, are not indented.""" 39 # indent = current indentation 40 # addindent = indentation to add to higher levels 41 # newl = newline string 42 writer.write(indent+"<" + self.tagName) 43 44 attrs = self._get_attributes() 45 a_names = attrs.keys() 46 a_names.sort() 47 48 for a_name in a_names: 49 writer.write(" %s=\"" % a_name) 50 minidom._write_data(writer, attrs[a_name].value) 51 writer.write("\"") 52 if self.childNodes: 53 # We need to write text nodes without newline and indentation, so 54 # we handle them differently. Note that we here assume that "empty" 55 # text nodes can be done away with (see the strip()). Note also that 56 # nested tags in a text node (like ph tags in xliff) should also not 57 # have newlines and indentation or an extra newline, since that will 58 # alter the text node. 59 haveText = False 60 for childNode in self.childNodes: 61 if childNode.nodeType == self.TEXT_NODE and childNode.data.strip(): 62 haveText = True 63 break 64 if haveText: 65 writer.write(">") 66 for node in self.childNodes: 67 node.writexml(writer, "", "", "") 68 writer.write("</%s>%s" % (self.tagName, newl)) 69 else: 70 # This is the normal case that we do with pretty layout 71 writer.write(">%s"%(newl)) 72 for node in self.childNodes: 73 if node.nodeType != self.TEXT_NODE: 74 node.writexml(writer, indent+addindent, addindent, newl) 75 writer.write("%s</%s>%s" % (indent, self.tagName, newl)) 76 else: 77 writer.write("/>%s"%(newl))
78
79 -def getElementsByTagName_helper(parent, name, dummy=None):
80 """A reimplementation of getElementsByTagName as an iterator. 81 82 Note that this is not compatible with getElementsByTagName that returns a 83 list, therefore, the class below exposes this through yieldElementsByTagName""" 84 85 for node in parent.childNodes: 86 if node.nodeType == minidom.Node.ELEMENT_NODE and \ 87 (name == "*" or node.tagName == name): 88 yield node 89 if node.hasChildNodes(): 90 for othernode in node.getElementsByTagName(name): 91 yield othernode
92
93 -def searchElementsByTagName_helper(parent, name, onlysearch):
94 """limits the search to within tags occuring in onlysearch""" 95 for node in parent.childNodes: 96 if node.nodeType == minidom.Node.ELEMENT_NODE and \ 97 (name == "*" or node.tagName == name): 98 yield node 99 if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch: 100 for node in node.searchElementsByTagName(name, onlysearch): 101 yield node
102
103 -def getFirstElementByTagName(node, name):
104 results = node.yieldElementsByTagName(name) 105 # if isinstance(results, list): 106 # if len(results) == 0: 107 # return None 108 # else: 109 # return results[0] 110 try: 111 result = results.next() 112 return result 113 except StopIteration: 114 return None
115
116 -def getnodetext(node):
117 """returns the node's text by iterating through the child nodes""" 118 if node is None: 119 return "" 120 return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
121 122 # various modifications to minidom classes to add functionality we like 123
124 -class DOMImplementation(minidom.DOMImplementation):
125 - def _create_document(self):
126 return Document()
127
128 -class Element(minidom.Element):
129 - def yieldElementsByTagName(self, name):
131 - def searchElementsByTagName(self, name, onlysearch):
132 return searchElementsByTagName_helper(self, name, onlysearch)
133 - def writexml(self, writer, indent, addindent, newl):
134 return writexml_helper(self, writer, indent, addindent, newl)
135
136 -class Document(minidom.Document):
137 implementation = DOMImplementation()
138 - def yieldElementsByTagName(self, name):
140 - def searchElementsByTagName(self, name, onlysearch):
141 return searchElementsByTagName_helper(self, name, onlysearch)
142 - def createElement(self, tagName):
143 e = Element(tagName) 144 e.ownerDocument = self 145 return e
146 - def createElementNS(self, namespaceURI, qualifiedName):
147 prefix, localName = _nssplit(qualifiedName) 148 e = Element(qualifiedName, namespaceURI, prefix) 149 e.ownerDocument = self 150 return e
151 152 theDOMImplementation = DOMImplementation() 153 154 # an ExpatBuilder that allows us to use the above modifications 155
156 -class ExpatBuilderNS(expatbuilder.ExpatBuilderNS):
157 - def reset(self):
158 """Free all data structures used during DOM construction.""" 159 self.document = theDOMImplementation.createDocument( 160 expatbuilder.EMPTY_NAMESPACE, None, None) 161 self.curNode = self.document 162 self._elem_info = self.document._elem_info 163 self._cdata = False 164 self._initNamespaces()
165
166 - def start_element_handler(self, name, attributes):
167 # all we want to do is construct our own Element instead of minidom.Element 168 # unfortunately the only way to do this is to copy this whole function from expatbuilder.py 169 if ' ' in name: 170 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name) 171 else: 172 uri = expatbuilder.EMPTY_NAMESPACE 173 qname = name 174 localname = None 175 prefix = expatbuilder.EMPTY_PREFIX 176 node = Element(qname, uri, prefix, localname) 177 node.ownerDocument = self.document 178 expatbuilder._append_child(self.curNode, node) 179 self.curNode = node 180 181 if self._ns_ordered_prefixes: 182 for prefix, uri in self._ns_ordered_prefixes: 183 if prefix: 184 a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix), 185 expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns") 186 else: 187 a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE, 188 "xmlns", expatbuilder.EMPTY_PREFIX) 189 d = a.childNodes[0].__dict__ 190 d['data'] = d['nodeValue'] = uri 191 d = a.__dict__ 192 d['value'] = d['nodeValue'] = uri 193 d['ownerDocument'] = self.document 194 expatbuilder._set_attribute_node(node, a) 195 del self._ns_ordered_prefixes[:] 196 197 if attributes: 198 _attrs = node._attrs 199 _attrsNS = node._attrsNS 200 for i in range(0, len(attributes), 2): 201 aname = attributes[i] 202 value = attributes[i+1] 203 if ' ' in aname: 204 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname) 205 a = minidom.Attr(qname, uri, localname, prefix) 206 _attrs[qname] = a 207 _attrsNS[(uri, localname)] = a 208 else: 209 a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE, 210 aname, expatbuilder.EMPTY_PREFIX) 211 _attrs[aname] = a 212 _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a 213 d = a.childNodes[0].__dict__ 214 d['data'] = d['nodeValue'] = value 215 d = a.__dict__ 216 d['ownerDocument'] = self.document 217 d['value'] = d['nodeValue'] = value 218 d['ownerElement'] = node
219 220 if __debug__: 221 # This only adds some asserts to the original 222 # end_element_handler(), so we only define this when -O is not 223 # used. If changing one, be sure to check the other to see if 224 # it needs to be changed as well. 225 #
226 - def end_element_handler(self, name):
227 curNode = self.curNode 228 if ' ' in name: 229 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name) 230 assert (curNode.namespaceURI == uri 231 and curNode.localName == localname 232 and curNode.prefix == prefix), \ 233 "element stack messed up! (namespace)" 234 else: 235 assert curNode.nodeName == name, \ 236 "element stack messed up - bad nodeName" 237 assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \ 238 "element stack messed up - bad namespaceURI" 239 self.curNode = curNode.parentNode 240 self._finish_end_element(curNode)
241 242 # parser methods that use our modified xml classes 243
244 -def parse(file, parser=None, bufsize=None):
245 """Parse a file into a DOM by filename or file object.""" 246 builder = ExpatBuilderNS() 247 if isinstance(file, basestring): 248 fp = open(file, 'rb') 249 try: 250 result = builder.parseFile(fp) 251 finally: 252 fp.close() 253 else: 254 result = builder.parseFile(file) 255 return result
256
257 -def parseString(string, parser=None):
258 """Parse a file into a DOM from a string.""" 259 builder = ExpatBuilderNS() 260 return builder.parseString(string)
261