xmlparser.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2007-2012 by Johan De Taeye, frePPLe bvba *
4  * *
5  * This library is free software; you can redistribute it and/or modify it *
6  * under the terms of the GNU Affero General Public License as published *
7  * by the Free Software Foundation; either version 3 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This library is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU Affero General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU Affero General Public *
16  * License along with this program. *
17  * If not, see <http://www.gnu.org/licenses/>. *
18  * *
19  ***************************************************************************/
20 
21 #define FREPPLE_CORE
22 #include "frepple/utils.h"
23 #include <sys/stat.h>
24 
25 /* Uncomment the next line to create a lot of debugging messages during
26  * the parsing of XML-data. */
27 //#define PARSE_DEBUG
28 
29 // With VC++ we use the Win32 functions to browse a directory
30 #ifdef _MSC_VER
31 #define WIN32_LEAN_AND_MEAN
32 #include <windows.h>
33 #else
34 // With Unix-like systems we use a check suggested by the autoconf tools
35 #if HAVE_DIRENT_H
36 # include <dirent.h>
37 # define NAMLEN(dirent) strlen((dirent)->d_name)
38 #else
39 # define dirent direct
40 # define NAMLEN(dirent) (dirent)->d_namlen
41 # if HAVE_SYS_NDIR_H
42 # include <sys/ndir.h>
43 # endif
44 # if HAVE_SYS_DIR_H
45 # include <sys/dir.h>
46 # endif
47 # if HAVE_NDIR_H
48 # include <ndir.h>
49 # endif
50 #endif
51 #endif
52 
53 
54 namespace frepple
55 {
56 namespace utils
57 {
58 
62 
63 
64 DECLARE_EXPORT void XMLInput::processingInstruction
65 (const XMLCh *const target, const XMLCh *const data)
66 {
67  char* type = xercesc::XMLString::transcode(target);
68  char* value = xercesc::XMLString::transcode(data);
69  try
70  {
71  if (!strcmp(type,"python"))
72  {
73  // "python" is the only processing instruction which we process.
74  // Others will be silently ignored
75  try
76  {
77  // Execute the processing instruction
79  }
80  catch (const DataException& e)
81  {
82  if (abortOnDataException)
83  {
84  xercesc::XMLString::release(&type);
85  xercesc::XMLString::release(&value);
86  throw;
87  }
88  else logger << "Continuing after data error: " << e.what() << endl;
89  }
90  }
91  xercesc::XMLString::release(&type);
92  xercesc::XMLString::release(&value);
93  }
94  catch (...)
95  {
96  xercesc::XMLString::release(&type);
97  xercesc::XMLString::release(&value);
98  throw;
99  }
100 }
101 
102 
103 DECLARE_EXPORT void XMLInput::startElement(const XMLCh* const uri,
104  const XMLCh* const n, const XMLCh* const qname,
105  const xercesc::Attributes& atts)
106 {
107  // Validate the state
108  assert(!states.empty());
109 
110  // Check for excessive number of open objects
111  if (numElements >= maxdepth)
112  throw DataException("XML-document with elements nested excessively deep");
113 
114  // Push the element on the stack
115  datapair *pElement = &m_EStack[numElements+1];
116  pElement->first.reset(n);
117  pElement->second.reset();
118 
119  // Store a pointer to the attributes
120  attributes = &atts;
121 
122  switch (states.top())
123  {
124  case SHUTDOWN:
125  // STATE: Parser is shutting down, and we can ignore all input that
126  // is still coming
127  return;
128 
129  case IGNOREINPUT:
130  // STATE: Parser is ignoring a part of the input
131  if (pElement->first.getHash() == endingHashes.top())
132  // Increase the count of occurences before the ignore section ends
133  ++ignore;
134  ++numElements;
135  return;
136 
137  case INIT:
138  // STATE: The only time the parser comes in this state is when we read
139  // opening tag of the ROOT tag.
140 #ifdef PARSE_DEBUG
141  if (!m_EHStack.empty())
142  logger << "Initialize root tag for reading object "
143  << getCurrentObject() << " ("
144  << typeid(*getCurrentObject()).name() << ")" << endl;
145  else
146  logger << "Initialize root tag for reading object NULL" << endl;
147 #endif
148  states.top() = READOBJECT;
149  endingHashes.push(pElement->first.getHash());
150  // Note that there is no break or return here. We also execute the
151  // statements of the following switch-case.
152 
153  case READOBJECT:
154  // STATE: Parser is reading data elements of an object
155  // Debug
156 #ifdef PARSE_DEBUG
157  logger << " Start element " << pElement->first.getName()
158  << " - object " << getCurrentObject() << endl;
159 #endif
160 
161  // Call the handler of the object
162  assert(!m_EHStack.empty());
163  try {getCurrentObject()->beginElement(*this, pElement->first);}
164  catch (const DataException& e)
165  {
166  if (abortOnDataException) throw;
167  else logger << "Continuing after data error: " << e.what() << endl;
168  }
169 
170  // Now process all attributes. For attributes we only call the
171  // endElement() member and skip the beginElement() method.
172  numElements += 1;
173  if (states.top() != IGNOREINPUT)
174  for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++)
175  {
176  char* val = xercesc::XMLString::transcode(atts.getValue(i));
177  m_EStack[numElements+1].first.reset(atts.getLocalName(i));
178  m_EStack[numElements+1].second.setData(val);
179 #ifdef PARSE_DEBUG
180  char* attname = xercesc::XMLString::transcode(atts.getQName(i));
181  logger << " Processing attribute " << attname
182  << " - object " << getCurrentObject() << endl;
183  xercesc::XMLString::release(&attname);
184 #endif
185  try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);}
186  catch (const DataException& e)
187  {
188  if (abortOnDataException) throw;
189  else logger << "Continuing after data error: " << e.what() << endl;
190  }
191  xercesc::XMLString::release(&val);
192  // Stop processing attributes if we are now in the ignore mode
193  if (states.top() == IGNOREINPUT) break;
194  }
195  } // End of switch statement
196 
197  // Outside of this handler, no attributes are available
198  attributes = NULL;
199 }
200 
201 
202 DECLARE_EXPORT void XMLInput::endElement(const XMLCh* const uri,
203  const XMLCh* const s,
204  const XMLCh* const qname)
205 {
206  // Validate the state
207  assert(numElements >= 0);
208  assert(!states.empty());
209  assert(numElements < maxdepth);
210 
211  // Remove an element from the stack
212  datapair *pElement = &(m_EStack[numElements--]);
213 
214  switch (states.top())
215  {
216  case INIT:
217  // This should never happen!
218  throw LogicException("Unreachable code reached");
219 
220  case SHUTDOWN:
221  // STATE: Parser is shutting down, and we can ignore all input that is
222  // still coming
223  return;
224 
225  case IGNOREINPUT:
226  // STATE: Parser is ignoring a part of the input
227 #ifdef PARSE_DEBUG
228  logger << " End element " << pElement->first.getName()
229  << " - IGNOREINPUT state" << endl;
230 #endif
231  // Continue if we aren't dealing with the tag being ignored
232  if (pElement->first.getHash() != endingHashes.top()) return;
233  if (ignore == 0)
234  {
235  // Finished ignoring now
236  states.pop();
237  endingHashes.pop();
238 #ifdef PARSE_DEBUG
239  logger << "Finish IGNOREINPUT state" << endl;
240 #endif
241  }
242  else
243  --ignore;
244  break;
245 
246  case READOBJECT:
247  // STATE: Parser is reading data elements of an object
248 #ifdef PARSE_DEBUG
249  logger << " End element " << pElement->first.getName()
250  << " - object " << getCurrentObject() << endl;
251 #endif
252 
253  // Check if we finished with the current handler
254  assert(!m_EHStack.empty());
255  if (pElement->first.getHash() == endingHashes.top())
256  {
257  // Call the ending handler of the Object, with a special
258  // flag to specify that this object is now ended
259  objectEnded = true;
260  try
261  {
262  getCurrentObject()->endElement(*this, pElement->first, pElement->second);
263  if (userexit) userexit.call(getCurrentObject());
264  }
265  catch (const DataException& e)
266  {
267  if (abortOnDataException) throw;
268  else logger << "Continuing after data error: " << e.what() << endl;
269  }
270  objectEnded = false;
271 #ifdef PARSE_DEBUG
272  logger << "Finish reading object " << getCurrentObject() << endl;
273 #endif
274  // Pop from the handler object stack
275  prev = getCurrentObject();
276  m_EHStack.pop_back();
277  endingHashes.pop();
278 
279  // Pop from the state stack
280  states.pop();
281  if (m_EHStack.empty())
282  shutdown();
283  else
284  {
285  // Call also the endElement function on the owning object
286  try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
287  catch (const DataException& e)
288  {
289  if (abortOnDataException) throw;
290  else logger << "Continuing after data error: " << e.what() << endl;
291  }
292 #ifdef PARSE_DEBUG
293  logger << " End element " << pElement->first.getName()
294  << " - object " << getCurrentObject() << endl;
295 #endif
296  }
297  }
298  else
299  // This tag is not the ending tag of an object
300  // Call the function of the Object
301  try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
302  catch (const DataException& e)
303  {
304  if (abortOnDataException) throw;
305  else logger << "Continuing after data error: " << e.what() << endl;
306  }
307  }
308 }
309 
310 
311 // Unfortunately the prototype for this handler function differs between
312 // Xerces-c 2.x and 3.x
313 #if XERCES_VERSION_MAJOR==2
314 DECLARE_EXPORT void XMLInput::characters(const XMLCh *const c, const unsigned int n)
315 #else
316 DECLARE_EXPORT void XMLInput::characters(const XMLCh *const c, const XMLSize_t n)
317 #endif
318 {
319  // No data capture during the ignore state
320  if (states.top()==IGNOREINPUT) return;
321 
322  // Process the data
323  char* name = xercesc::XMLString::transcode(c);
324  m_EStack[numElements].second.addData(name, strlen(name));
325  xercesc::XMLString::release(&name);
326 }
327 
328 
329 DECLARE_EXPORT void XMLInput::warning(const xercesc::SAXParseException& e)
330 {
331  char* message = xercesc::XMLString::transcode(e.getMessage());
332  logger << "Warning: " << message;
333  if (e.getLineNumber() > 0) logger << " at line: " << e.getLineNumber();
334  logger << endl;
335  xercesc::XMLString::release(&message);
336 }
337 
338 
339 DECLARE_EXPORT void XMLInput::fatalError(const xercesc::SAXParseException& e)
340 {
341  char* message = xercesc::XMLString::transcode(e.getMessage());
342  ostringstream ch;
343  ch << message;
344  if (e.getLineNumber() > 0) ch << " at line " << e.getLineNumber();
345  xercesc::XMLString::release(&message);
346  throw DataException(ch.str());
347 }
348 
349 
350 DECLARE_EXPORT void XMLInput::error(const xercesc::SAXParseException& e)
351 {
352  char* message = xercesc::XMLString::transcode(e.getMessage());
353  ostringstream ch;
354  ch << message;
355  if (e.getLineNumber() > 0) ch << " at line " << e.getLineNumber();
356  xercesc::XMLString::release(&message);
357  throw DataException(ch.str());
358 }
359 
360 
362 {
363  // Keep track of the tag where this object will end
364  assert(numElements >= -1);
365  endingHashes.push(m_EStack[numElements+1].first.getHash());
366  if (pPI)
367  {
368  // Push a new object on the handler stack
369 #ifdef PARSE_DEBUG
370  logger << "Start reading object " << pPI
371  << " (" << typeid(*pPI).name() << ")" << endl;
372 #endif
373  prev = getCurrentObject();
374  m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL)));
375  states.push(READOBJECT);
376  }
377  else
378  {
379  // Ignore the complete content of this element
380 #ifdef PARSE_DEBUG
381  logger << "Start ignoring input" << endl;
382 #endif
383  states.push(IGNOREINPUT);
384  }
385 }
386 
387 
389 {
390  // Already shutting down...
391  if (states.empty() || states.top() == SHUTDOWN) return;
392 
393  // Message
394 #ifdef PARSE_DEBUG
395  logger << " Forcing a shutdown - SHUTDOWN state" << endl;
396 #endif
397 
398  // Change the state
399  states.push(SHUTDOWN);
400 
401  // Done if we have no elements on the stack, i.e. a normal end.
402  if (numElements<0) return;
403 
404  // Call the ending handling of all objects on the stack
405  // This allows them to finish off in a valid state, and delete any temporary
406  // objects they may have allocated.
407  objectEnded = true;
408  m_EStack[numElements].first.reset("Not a real tag");
409  m_EStack[numElements].second.reset();
410  while (!m_EHStack.empty())
411  {
412  try
413  {
414  getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);
415  if (userexit) userexit.call(getCurrentObject());
416  }
417  catch (const DataException& e)
418  {
419  if (abortOnDataException) throw;
420  else logger << "Continuing after data error: " << e.what() << endl;
421  }
422  m_EHStack.pop_back();
423  }
424 }
425 
426 
427 DECLARE_EXPORT void XMLInput::reset()
428 {
429  // Delete the xerces parser object
430  delete parser;
431  parser = NULL;
432 
433  // Call the ending handling of all objects on the stack
434  // This allows them to finish off in a valid state, and delete any temporary
435  // objects they may have allocated.
436  if (!m_EHStack.empty())
437  {
438  // The next line is to avoid calling the endElement handler twice for the
439  // last object. E.g. endElement handler causes and exception, and as part
440  // of the exception handling we call the reset method.
441  if (objectEnded) m_EHStack.pop_back();
442  objectEnded = true;
443  m_EStack[++numElements].first.reset("Not a real tag");
444  m_EStack[++numElements].second.reset();
445  while (!m_EHStack.empty())
446  {
447  try
448  {
449  getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);
450  if (userexit) userexit.call(getCurrentObject());
451  }
452  catch (const DataException& e)
453  {
454  if (abortOnDataException) throw;
455  else logger << "Continuing after data error: " << e.what() << endl;
456  }
457  m_EHStack.pop_back();
458  }
459  }
460 
461  // Cleanup of stacks
462  while (!states.empty()) states.pop();
463  while (!endingHashes.empty()) endingHashes.pop();
464 
465  // Set all variables back to their starting values
466  numElements = -1;
467  ignore = 0;
468  objectEnded = false;
469  attributes = NULL;
470 }
471 
472 
473 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate)
474 {
475  try
476  {
477  // Create a Xerces parser
478  parser = xercesc::XMLReaderFactory::createXMLReader();
479 
480  // Set the features of the parser. A bunch of the options are dependent
481  // on whether we want to validate the input or not.
482  parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*>
483  (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner));
484  parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate);
485  parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false);
486  parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false);
487  parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false);
488  parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate);
489  parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false);
490  parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true);
491  parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true);
492 
493  if (validate)
494  {
495  // Specify the no-namespace schema file
496  string schema = Environment::searchFile("frepple.xsd");
497  if (schema.empty())
498  throw RuntimeException("Can't find XML schema file 'frepple.xsd'");
499  XMLCh *c = xercesc::XMLString::transcode(schema.c_str());
500  parser->setProperty(
501  xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c
502  );
503  xercesc::XMLString::release(&c);
504  }
505 
506  // If we are reading into a NULL object, there is no need to use a
507  // content handler or a handler stack.
508  if (pRoot)
509  {
510  // Set the event handler. If we are reading into a NULL object, there is
511  // no need to use a content handler.
512  parser->setContentHandler(this);
513 
514  // Get the parser to read data into the object pRoot.
515  m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL)));
516  states.push(INIT);
517  }
518 
519  // Set the error handler
520  parser->setErrorHandler(this);
521 
522  // Parse the input
523  parser->parse(in);
524  }
525  // Note: the reset() method needs to be called in all circumstances. The
526  // reset method allows all objects to finish in a valid state and clean up
527  // any memory they may have allocated.
528  catch (const xercesc::XMLException& toCatch)
529  {
530  char* message = xercesc::XMLString::transcode(toCatch.getMessage());
531  string msg(message);
532  xercesc::XMLString::release(&message);
533  reset();
534  throw RuntimeException("Parsing error: " + msg);
535  }
536  catch (const exception& toCatch)
537  {
538  reset();
539  ostringstream msg;
540  msg << "Error during XML parsing: " << toCatch.what();
541  throw RuntimeException(msg.str());
542  }
543  catch (...)
544  {
545  reset();
546  throw RuntimeException(
547  "Parsing error: Unexpected exception during XML parsing");
548  }
549  reset();
550 }
551 
552 
553 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x)
554 {
555  for (const char* p = x.data; *p; ++p)
556  {
557  switch (*p)
558  {
559  case '&': os << "&amp;"; break;
560  case '<': os << "&lt;"; break;
561  case '>': os << "&gt;"; break;
562  case '"': os << "&quot;"; break;
563  case '\'': os << "&apos;"; break;
564  default: os << *p;
565  }
566  }
567  return os;
568 }
569 
570 
571 DECLARE_EXPORT void XMLOutput::incIndent()
572 {
573  indentstring[m_nIndent++] = '\t';
574  if (m_nIndent > 40) m_nIndent = 40;
575  indentstring[m_nIndent] = '\0';
576 }
577 
578 
579 DECLARE_EXPORT void XMLOutput::decIndent()
580 {
581  if (--m_nIndent < 0) m_nIndent = 0;
582  indentstring[m_nIndent] = '\0';
583 }
584 
585 
587 (const Keyword& tag, const Object* object, mode m)
588 {
589  // Avoid NULL pointers and skip hidden objects
590  if (!object || object->getHidden()) return;
591 
592  // Adjust current and parent object pointer
593  const Object *previousParent = parentObject;
594  parentObject = currentObject;
595  currentObject = object;
596  ++numObjects;
597  ++numParents;
598 
599  // Call the write method on the object
600  if (m != DEFAULT)
601  // Mode is overwritten
602  object->writeElement(this, tag, m);
603  else
604  // Choose wether to save a reference of the object.
605  // The root object can't be saved as a reference.
606  object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT);
607 
608  // Adjust current and parent object pointer
609  --numParents;
610  currentObject = parentObject;
611  parentObject = previousParent;
612 }
613 
614 
616 {
617  // Root object can't be null...
618  if (!object)
619  throw RuntimeException("Can't accept a NULL object as XML root");
620 
621  // There should not be any saved objects yet
622  if (numObjects > 0)
623  throw LogicException("Can't have multiple headers in a document");
624  assert(!parentObject);
625  assert(!currentObject);
626 
627  // Write the first line for the xml document
629 
630  // Adjust current object pointer
631  currentObject = object;
632 
633  // Write the object
634  ++numObjects;
635  ++numParents;
636  BeginObject(tag, getHeaderAtts());
637  object->writeElement(this, tag, NOHEADER);
638 
639  // Adjust current and parent object pointer
640  currentObject = NULL;
641  parentObject = NULL;
642 }
643 
644 
646 {
647  // There should not be any saved objects yet
648  if (numObjects > 0 || !parentObject || !currentObject)
649  throw LogicException("Writing invalid header to XML document");
650 
651  // Write the first line and the opening tag
653  BeginObject(tag, getHeaderAtts());
654 
655  // Fake a dummy parent
656  numParents += 2;
657 }
658 
659 
661 {
662  char* s = xercesc::XMLString::transcode(atts->getValue(key.getXMLCharacters()));
663  const_cast<XMLAttributeList*>(this)->result.setData(s ? s : "");
664  xercesc::XMLString::release(&s);
665  return &result;
666 }
667 
668 
670 {
671  switch (getData()[0])
672  {
673  case 'T':
674  case 't':
675  case '1':
676  return true;
677  case 'F':
678  case 'f':
679  case '0':
680  return false;
681  }
682  throw DataException("Invalid boolean value: " + string(getData()));
683 }
684 
685 
687 {
688  if (ch) return ch;
689  Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash);
690  if (i == Keyword::getTags().end())
691  throw LogicException("Undefined element keyword");
692  return i->second->getName().c_str();
693 }
694 
695 
696 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name)
697 {
698  // Error condition: name is empty
699  if (name.empty()) throw LogicException("Creating keyword without name");
700 
701  // Create a number of variations of the tag name
702  strStartElement = string("<") + name;
703  strEndElement = string("</") + name + ">\n";
704  strElement = string("<") + name + ">";
705  strAttribute = string(" ") + name + "=\"";
706 
707  // Compute the hash value
708  dw = hash(name.c_str());
709 
710  // Create a properly encoded Xerces string
711  xercesc::XMLPlatformUtils::Initialize();
712  xmlname = xercesc::XMLString::transcode(name.c_str());
713 
714  // Verify that the hash is "perfect".
715  check();
716 }
717 
718 
719 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace)
720  : strName(name)
721 {
722  // Error condition: name is empty
723  if (name.empty())
724  throw LogicException("Creating keyword without name");
725  if (nspace.empty())
726  throw LogicException("Creating keyword with empty namespace");
727 
728  // Create a number of variations of the tag name
729  strStartElement = string("<") + nspace + ":" + name;
730  strEndElement = string("</") + nspace + ":" + name + ">\n";
731  strElement = string("<") + nspace + ":" + name + ">";
732  strAttribute = string(" ") + nspace + ":" + name + "=\"";
733 
734  // Compute the hash value
735  dw = hash(name);
736 
737  // Create a properly encoded Xerces string
738  xercesc::XMLPlatformUtils::Initialize();
739  xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str());
740 
741  // Verify that the hash is "perfect".
742  check();
743 }
744 
745 
746 void Keyword::check()
747 {
748  // To be thread-safe we make sure only a single thread at a time
749  // can execute this check.
750  static Mutex dd;
751  {
752  ScopeMutexLock l(dd);
753  tagtable::const_iterator i = getTags().find(dw);
754  if (i!=getTags().end() && i->second->getName()!=strName)
755  throw LogicException("Tag XML-tag hash function clashes for "
756  + i->second->getName() + " and " + strName);
757  getTags().insert(make_pair(dw,this));
758  }
759 }
760 
761 
763 {
764  // Remove from the tag list
765  tagtable::iterator i = getTags().find(dw);
766  if (i!=getTags().end()) getTags().erase(i);
767 
768  // Destroy the xerces string
769  xercesc::XMLString::release(&xmlname);
770  xercesc::XMLPlatformUtils::Terminate();
771 }
772 
773 
774 DECLARE_EXPORT const Keyword& Keyword::find(const char* name)
775 {
776  tagtable::const_iterator i = getTags().find(hash(name));
777  return *(i!=getTags().end() ? i->second : new Keyword(name));
778 }
779 
780 
782 {
783  static tagtable alltags;
784  return alltags;
785 }
786 
787 
789 {
790  if (c == 0 || *c == 0) return 0;
791 
792  // Compute hash
793  const char* curCh = c;
794  hashtype hashVal = *curCh++;
795  while (*curCh)
796  hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
797 
798  // Divide by modulus
799  return hashVal % 954991;
800 }
801 
802 
804 {
805  char* c = xercesc::XMLString::transcode(t);
806  if (c == 0 || *c == 0)
807  {
808  xercesc::XMLString::release(&c);
809  return 0;
810  }
811 
812  // Compute hash
813  const char* curCh = c;
814  hashtype hashVal = *curCh++;
815  while (*curCh)
816  hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
817 
818  // Divide by modulus
819  xercesc::XMLString::release(&c);
820  return hashVal % 954991;
821 }
822 
823 
825 {
826  for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i)
827  logger << i->second->getName() << " " << i->second->dw << endl;
828 }
829 
830 
831 DECLARE_EXPORT void XMLInputFile::parse(Object *pRoot, bool validate)
832 {
833  // Check if string has been set
834  if (filename.empty())
835  throw DataException("Missing input file or directory");
836 
837  // Check if the parameter is the name of a directory
838  struct stat stat_p;
839  if (stat(filename.c_str(), &stat_p))
840  // Can't verify the status
841  throw RuntimeException("Couldn't open input file '" + filename + "'");
842  else if (stat_p.st_mode & S_IFDIR)
843  {
844  // Data is a directory: loop through all *.xml files now. No recursion in
845  // subdirectories is done.
846  // The code is unfortunately different for Windows & Linux. Sigh...
847 #ifdef _MSC_VER
848  string f = filename + "\\*.xml";
849  WIN32_FIND_DATA dir_entry_p;
850  HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p);
851  if (h == INVALID_HANDLE_VALUE)
852  throw RuntimeException("Couldn't open input file '" + f + "'");
853  do
854  {
855  f = filename + '/' + dir_entry_p.cFileName;
856  XMLInputFile(f.c_str()).parse(pRoot);
857  }
858  while (FindNextFile(h, &dir_entry_p));
859  FindClose(h);
860 #elif HAVE_DIRENT_H
861  struct dirent *dir_entry_p;
862  DIR *dir_p = opendir(filename.c_str());
863  while (NULL != (dir_entry_p = readdir(dir_p)))
864  {
865  int n = NAMLEN(dir_entry_p);
866  if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4))
867  {
868  string f = filename + '/' + dir_entry_p->d_name;
869  XMLInputFile(f.c_str()).parse(pRoot, validate);
870  }
871  }
872  closedir(dir_p);
873 #else
874  throw RuntimeException("Can't process a directory on your platform");
875 #endif
876  }
877  else
878  {
879  // Normal file
880  // Parse the file
881  XMLCh *f = xercesc::XMLString::transcode(filename.c_str());
882  xercesc::LocalFileInputSource in(f);
883  xercesc::XMLString::release(&f);
884  XMLInput::parse(in, pRoot, validate);
885  }
886 }
887 
888 } // end namespace
889 } // end namespace