1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.5"
61 __versionTime__ = "13 Jun 2016 19:59 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import traceback
74 from datetime import datetime
75
76
77
78 __all__ = [
79 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
80 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
81 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
82 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
83 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
84 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
85 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
86 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
87 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
88 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
89 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
90 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
91 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
92 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
93 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
94 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
95 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
96 'tokenMap', 'pyparsing_common',
97 ]
98
99 system_version = tuple(sys.version_info)[:3]
100 PY_3 = system_version[0] == 3
101 if PY_3:
102 _MAX_INT = sys.maxsize
103 basestring = str
104 unichr = chr
105 _ustr = str
106
107
108 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
109
110 else:
111 _MAX_INT = sys.maxint
112 range = xrange
115 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
116 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
117 then < returns the unicode object | encodes it with the default encoding | ... >.
118 """
119 if isinstance(obj,unicode):
120 return obj
121
122 try:
123
124
125 return str(obj)
126
127 except UnicodeEncodeError:
128
129 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
130 xmlcharref = Regex('&#\d+;')
131 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
132 return xmlcharref.transformString(ret)
133
134
135 singleArgBuiltins = []
136 import __builtin__
137 for fname in "sum len sorted reversed list tuple set any all min max".split():
138 try:
139 singleArgBuiltins.append(getattr(__builtin__,fname))
140 except AttributeError:
141 continue
142
143 _generatorType = type((y for y in range(1)))
146 """Escape &, <, >, ", ', etc. in a string of data."""
147
148
149 from_symbols = '&><"\''
150 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
151 for from_,to_ in zip(from_symbols, to_symbols):
152 data = data.replace(from_, to_)
153 return data
154
157
158 alphas = string.ascii_uppercase + string.ascii_lowercase
159 nums = "0123456789"
160 hexnums = nums + "ABCDEFabcdef"
161 alphanums = alphas + nums
162 _bslash = chr(92)
163 printables = "".join(c for c in string.printable if c not in string.whitespace)
166 """base exception class for all parsing runtime exceptions"""
167
168
169 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
170 self.loc = loc
171 if msg is None:
172 self.msg = pstr
173 self.pstr = ""
174 else:
175 self.msg = msg
176 self.pstr = pstr
177 self.parserElement = elem
178
180 """supported attributes by name are:
181 - lineno - returns the line number of the exception text
182 - col - returns the column number of the exception text
183 - line - returns the line containing the exception text
184 """
185 if( aname == "lineno" ):
186 return lineno( self.loc, self.pstr )
187 elif( aname in ("col", "column") ):
188 return col( self.loc, self.pstr )
189 elif( aname == "line" ):
190 return line( self.loc, self.pstr )
191 else:
192 raise AttributeError(aname)
193
195 return "%s (at char %d), (line:%d, col:%d)" % \
196 ( self.msg, self.loc, self.lineno, self.column )
210 return "lineno col line".split() + dir(type(self))
211
213 """exception thrown when parse expressions don't match class;
214 supported attributes by name are:
215 - lineno - returns the line number of the exception text
216 - col - returns the column number of the exception text
217 - line - returns the line containing the exception text
218 """
219 pass
220
222 """user-throwable exception thrown when inconsistent parse content
223 is found; stops all parsing immediately"""
224 pass
225
227 """just like C{L{ParseFatalException}}, but thrown internally when an
228 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
229 an unbacktrackable syntax error has been found"""
233
248 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
249 - def __init__( self, parseElementList ):
250 self.parseElementTrace = parseElementList
251
253 return "RecursiveGrammarException: %s" % self.parseElementTrace
254
261 return repr(self.tup)
263 self.tup = (self.tup[0],i)
264
266 """Structured parse results, to provide multiple means of access to the parsed data:
267 - as a list (C{len(results)})
268 - by list index (C{results[0], results[1]}, etc.)
269 - by attribute (C{results.<resultsName>})
270 """
271 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
272 if isinstance(toklist, cls):
273 return toklist
274 retobj = object.__new__(cls)
275 retobj.__doinit = True
276 return retobj
277
278
279
280 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
281 if self.__doinit:
282 self.__doinit = False
283 self.__name = None
284 self.__parent = None
285 self.__accumNames = {}
286 self.__asList = asList
287 self.__modal = modal
288 if toklist is None:
289 toklist = []
290 if isinstance(toklist, list):
291 self.__toklist = toklist[:]
292 elif isinstance(toklist, _generatorType):
293 self.__toklist = list(toklist)
294 else:
295 self.__toklist = [toklist]
296 self.__tokdict = dict()
297
298 if name is not None and name:
299 if not modal:
300 self.__accumNames[name] = 0
301 if isinstance(name,int):
302 name = _ustr(name)
303 self.__name = name
304 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
305 if isinstance(toklist,basestring):
306 toklist = [ toklist ]
307 if asList:
308 if isinstance(toklist,ParseResults):
309 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
310 else:
311 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
312 self[name].__name = name
313 else:
314 try:
315 self[name] = toklist[0]
316 except (KeyError,TypeError,IndexError):
317 self[name] = toklist
318
320 if isinstance( i, (int,slice) ):
321 return self.__toklist[i]
322 else:
323 if i not in self.__accumNames:
324 return self.__tokdict[i][-1][0]
325 else:
326 return ParseResults([ v[0] for v in self.__tokdict[i] ])
327
329 if isinstance(v,_ParseResultsWithOffset):
330 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
331 sub = v[0]
332 elif isinstance(k,(int,slice)):
333 self.__toklist[k] = v
334 sub = v
335 else:
336 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
337 sub = v
338 if isinstance(sub,ParseResults):
339 sub.__parent = wkref(self)
340
342 if isinstance(i,(int,slice)):
343 mylen = len( self.__toklist )
344 del self.__toklist[i]
345
346
347 if isinstance(i, int):
348 if i < 0:
349 i += mylen
350 i = slice(i, i+1)
351
352 removed = list(range(*i.indices(mylen)))
353 removed.reverse()
354
355
356
357
358
359
360 for name,occurrences in self.__tokdict.items():
361 for j in removed:
362 for k, (value, position) in enumerate(occurrences):
363 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
364 else:
365 del self.__tokdict[i]
366
368 return k in self.__tokdict
369
370 - def __len__( self ): return len( self.__toklist )
371 - def __bool__(self): return ( not not self.__toklist )
372 __nonzero__ = __bool__
373 - def __iter__( self ): return iter( self.__toklist )
374 - def __reversed__( self ): return iter( self.__toklist[::-1] )
376 if hasattr(self.__tokdict, "iterkeys"):
377 return self.__tokdict.iterkeys()
378 else:
379 return iter(self.__tokdict)
380
382 return (self[k] for k in self._iterkeys())
383
385 return ((k, self[k]) for k in self._iterkeys())
386
387 if PY_3:
388 keys = _iterkeys
389 """Returns an iterator of all named result keys (Python 3.x only)."""
390
391 values = _itervalues
392 """Returns an iterator of all named result values (Python 3.x only)."""
393
394 items = _iteritems
395 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
396
397 else:
398 iterkeys = _iterkeys
399 """Returns an iterator of all named result keys (Python 2.x only)."""
400
401 itervalues = _itervalues
402 """Returns an iterator of all named result values (Python 2.x only)."""
403
404 iteritems = _iteritems
405 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
406
408 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
409 return list(self.iterkeys())
410
412 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
413 return list(self.itervalues())
414
416 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
417 return list(self.iteritems())
418
420 """Since keys() returns an iterator, this method is helpful in bypassing
421 code that looks for the existence of any defined results names."""
422 return bool(self.__tokdict)
423
424 - def pop( self, *args, **kwargs):
425 """Removes and returns item at specified index (default=last).
426 Supports both list and dict semantics for pop(). If passed no
427 argument or an integer argument, it will use list semantics
428 and pop tokens from the list of parsed tokens. If passed a
429 non-integer argument (most likely a string), it will use dict
430 semantics and pop the corresponding value from any defined
431 results names. A second default return value argument is
432 supported, just as in dict.pop()."""
433 if not args:
434 args = [-1]
435 for k,v in kwargs.items():
436 if k == 'default':
437 args = (args[0], v)
438 else:
439 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
440 if (isinstance(args[0], int) or
441 len(args) == 1 or
442 args[0] in self):
443 index = args[0]
444 ret = self[index]
445 del self[index]
446 return ret
447 else:
448 defaultvalue = args[1]
449 return defaultvalue
450
451 - def get(self, key, defaultValue=None):
452 """Returns named result matching the given key, or if there is no
453 such name, then returns the given C{defaultValue} or C{None} if no
454 C{defaultValue} is specified."""
455 if key in self:
456 return self[key]
457 else:
458 return defaultValue
459
460 - def insert( self, index, insStr ):
461 """Inserts new element at location index in the list of parsed tokens."""
462 self.__toklist.insert(index, insStr)
463
464
465
466
467
468 for name,occurrences in self.__tokdict.items():
469 for k, (value, position) in enumerate(occurrences):
470 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
471
473 """Add single element to end of ParseResults list of elements."""
474 self.__toklist.append(item)
475
477 """Add sequence of elements to end of ParseResults list of elements."""
478 if isinstance(itemseq, ParseResults):
479 self += itemseq
480 else:
481 self.__toklist.extend(itemseq)
482
484 """Clear all elements and results names."""
485 del self.__toklist[:]
486 self.__tokdict.clear()
487
489 try:
490 return self[name]
491 except KeyError:
492 return ""
493
494 if name in self.__tokdict:
495 if name not in self.__accumNames:
496 return self.__tokdict[name][-1][0]
497 else:
498 return ParseResults([ v[0] for v in self.__tokdict[name] ])
499 else:
500 return ""
501
503 ret = self.copy()
504 ret += other
505 return ret
506
508 if other.__tokdict:
509 offset = len(self.__toklist)
510 addoffset = lambda a: offset if a<0 else a+offset
511 otheritems = other.__tokdict.items()
512 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
513 for (k,vlist) in otheritems for v in vlist]
514 for k,v in otherdictitems:
515 self[k] = v
516 if isinstance(v[0],ParseResults):
517 v[0].__parent = wkref(self)
518
519 self.__toklist += other.__toklist
520 self.__accumNames.update( other.__accumNames )
521 return self
522
524 if isinstance(other,int) and other == 0:
525
526 return self.copy()
527 else:
528
529 return other + self
530
532 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
533
535 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
536
538 out = []
539 for item in self.__toklist:
540 if out and sep:
541 out.append(sep)
542 if isinstance( item, ParseResults ):
543 out += item._asStringList()
544 else:
545 out.append( _ustr(item) )
546 return out
547
549 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
550 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
551
553 """Returns the named parse results as a nested dictionary."""
554 if PY_3:
555 item_fn = self.items
556 else:
557 item_fn = self.iteritems
558
559 def toItem(obj):
560 if isinstance(obj, ParseResults):
561 if obj.haskeys():
562 return obj.asDict()
563 else:
564 return [toItem(v) for v in obj]
565 else:
566 return obj
567
568 return dict((k,toItem(v)) for k,v in item_fn())
569
571 """Returns a new copy of a C{ParseResults} object."""
572 ret = ParseResults( self.__toklist )
573 ret.__tokdict = self.__tokdict.copy()
574 ret.__parent = self.__parent
575 ret.__accumNames.update( self.__accumNames )
576 ret.__name = self.__name
577 return ret
578
579 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
580 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
581 nl = "\n"
582 out = []
583 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
584 for v in vlist)
585 nextLevelIndent = indent + " "
586
587
588 if not formatted:
589 indent = ""
590 nextLevelIndent = ""
591 nl = ""
592
593 selfTag = None
594 if doctag is not None:
595 selfTag = doctag
596 else:
597 if self.__name:
598 selfTag = self.__name
599
600 if not selfTag:
601 if namedItemsOnly:
602 return ""
603 else:
604 selfTag = "ITEM"
605
606 out += [ nl, indent, "<", selfTag, ">" ]
607
608 for i,res in enumerate(self.__toklist):
609 if isinstance(res,ParseResults):
610 if i in namedItems:
611 out += [ res.asXML(namedItems[i],
612 namedItemsOnly and doctag is None,
613 nextLevelIndent,
614 formatted)]
615 else:
616 out += [ res.asXML(None,
617 namedItemsOnly and doctag is None,
618 nextLevelIndent,
619 formatted)]
620 else:
621
622 resTag = None
623 if i in namedItems:
624 resTag = namedItems[i]
625 if not resTag:
626 if namedItemsOnly:
627 continue
628 else:
629 resTag = "ITEM"
630 xmlBodyText = _xml_escape(_ustr(res))
631 out += [ nl, nextLevelIndent, "<", resTag, ">",
632 xmlBodyText,
633 "</", resTag, ">" ]
634
635 out += [ nl, indent, "</", selfTag, ">" ]
636 return "".join(out)
637
639 for k,vlist in self.__tokdict.items():
640 for v,loc in vlist:
641 if sub is v:
642 return k
643 return None
644
646 """Returns the results name for this token expression."""
647 if self.__name:
648 return self.__name
649 elif self.__parent:
650 par = self.__parent()
651 if par:
652 return par.__lookup(self)
653 else:
654 return None
655 elif (len(self) == 1 and
656 len(self.__tokdict) == 1 and
657 self.__tokdict.values()[0][0][1] in (0,-1)):
658 return self.__tokdict.keys()[0]
659 else:
660 return None
661
662 - def dump(self,indent='',depth=0):
663 """Diagnostic method for listing out the contents of a C{ParseResults}.
664 Accepts an optional C{indent} argument so that this string can be embedded
665 in a nested display of other data."""
666 out = []
667 NL = '\n'
668 out.append( indent+_ustr(self.asList()) )
669 if self.haskeys():
670 items = sorted(self.items())
671 for k,v in items:
672 if out:
673 out.append(NL)
674 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
675 if isinstance(v,ParseResults):
676 if v:
677 out.append( v.dump(indent,depth+1) )
678 else:
679 out.append(_ustr(v))
680 else:
681 out.append(_ustr(v))
682 elif any(isinstance(vv,ParseResults) for vv in self):
683 v = self
684 for i,vv in enumerate(v):
685 if isinstance(vv,ParseResults):
686 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
687 else:
688 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
689
690 return "".join(out)
691
692 - def pprint(self, *args, **kwargs):
693 """Pretty-printer for parsed results as a list, using the C{pprint} module.
694 Accepts additional positional or keyword args as defined for the
695 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
696 pprint.pprint(self.asList(), *args, **kwargs)
697
698
700 return ( self.__toklist,
701 ( self.__tokdict.copy(),
702 self.__parent is not None and self.__parent() or None,
703 self.__accumNames,
704 self.__name ) )
705
707 self.__toklist = state[0]
708 (self.__tokdict,
709 par,
710 inAccumNames,
711 self.__name) = state[1]
712 self.__accumNames = {}
713 self.__accumNames.update(inAccumNames)
714 if par is not None:
715 self.__parent = wkref(par)
716 else:
717 self.__parent = None
718
720 return self.__toklist, self.__name, self.__asList, self.__modal
721
723 return (dir(type(self)) + list(self.keys()))
724
725 collections.MutableMapping.register(ParseResults)
726
727 -def col (loc,strg):
728 """Returns current column within a string, counting newlines as line separators.
729 The first column is number 1.
730
731 Note: the default parsing behavior is to expand tabs in the input string
732 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
733 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
734 consistent view of the parsed string, the parse location, and line and column
735 positions within the parsed string.
736 """
737 s = strg
738 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
739
741 """Returns current line number within a string, counting newlines as line separators.
742 The first line is number 1.
743
744 Note: the default parsing behavior is to expand tabs in the input string
745 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
746 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
747 consistent view of the parsed string, the parse location, and line and column
748 positions within the parsed string.
749 """
750 return strg.count("\n",0,loc) + 1
751
752 -def line( loc, strg ):
753 """Returns the line of text containing loc within a string, counting newlines as line separators.
754 """
755 lastCR = strg.rfind("\n", 0, loc)
756 nextCR = strg.find("\n", loc)
757 if nextCR >= 0:
758 return strg[lastCR+1:nextCR]
759 else:
760 return strg[lastCR+1:]
761
763 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
764
766 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
767
769 print ("Exception raised:" + _ustr(exc))
770
772 """'Do-nothing' debug action, to suppress debugging output during parsing."""
773 pass
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797 'decorator to trim function calls to match the arity of the target'
799 if func in singleArgBuiltins:
800 return lambda s,l,t: func(t)
801 limit = [0]
802 foundArity = [False]
803
804
805 if system_version[:2] >= (3,5):
806 def extract_stack():
807
808 offset = -3 if system_version == (3,5,0) else -2
809 frame_summary = traceback.extract_stack()[offset]
810 return [(frame_summary.filename, frame_summary.lineno)]
811 def extract_tb(tb):
812 frames = traceback.extract_tb(tb)
813 frame_summary = frames[-1]
814 return [(frame_summary.filename, frame_summary.lineno)]
815 else:
816 extract_stack = traceback.extract_stack
817 extract_tb = traceback.extract_tb
818
819
820
821
822 LINE_DIFF = 6
823
824
825 this_line = extract_stack()[-1]
826 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
827
828 def wrapper(*args):
829 while 1:
830 try:
831 ret = func(*args[limit[0]:])
832 foundArity[0] = True
833 return ret
834 except TypeError:
835
836 if foundArity[0]:
837 raise
838 else:
839 try:
840 tb = sys.exc_info()[-1]
841 if not extract_tb(tb)[-1][:2] == pa_call_line_synth:
842 raise
843 finally:
844 del tb
845
846 if limit[0] <= maxargs:
847 limit[0] += 1
848 continue
849 raise
850
851
852 func_name = "<parse action>"
853 try:
854 func_name = getattr(func, '__name__',
855 getattr(func, '__class__').__name__)
856 except Exception:
857 func_name = str(func)
858 wrapper.__name__ = func_name
859
860 return wrapper
861
863 """Abstract base level parser element class."""
864 DEFAULT_WHITE_CHARS = " \n\t\r"
865 verbose_stacktrace = False
866
867 @staticmethod
872
873 @staticmethod
875 """
876 Set class to be used for inclusion of string literals into a parser.
877 """
878 ParserElement._literalStringClass = cls
879
881 self.parseAction = list()
882 self.failAction = None
883
884 self.strRepr = None
885 self.resultsName = None
886 self.saveAsList = savelist
887 self.skipWhitespace = True
888 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
889 self.copyDefaultWhiteChars = True
890 self.mayReturnEmpty = False
891 self.keepTabs = False
892 self.ignoreExprs = list()
893 self.debug = False
894 self.streamlined = False
895 self.mayIndexError = True
896 self.errmsg = ""
897 self.modalResults = True
898 self.debugActions = ( None, None, None )
899 self.re = None
900 self.callPreparse = True
901 self.callDuringTry = False
902
904 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
905 for the same parsing pattern, using copies of the original parse element."""
906 cpy = copy.copy( self )
907 cpy.parseAction = self.parseAction[:]
908 cpy.ignoreExprs = self.ignoreExprs[:]
909 if self.copyDefaultWhiteChars:
910 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
911 return cpy
912
914 """Define name for this expression, for use in debugging."""
915 self.name = name
916 self.errmsg = "Expected " + self.name
917 if hasattr(self,"exception"):
918 self.exception.msg = self.errmsg
919 return self
920
922 """Define name for referencing matching tokens as a nested attribute
923 of the returned parse results.
924 NOTE: this returns a *copy* of the original C{ParserElement} object;
925 this is so that the client can define a basic element, such as an
926 integer, and reference it in multiple places with different names.
927
928 You can also set results names using the abbreviated syntax,
929 C{expr("name")} in place of C{expr.setResultsName("name")} -
930 see L{I{__call__}<__call__>}.
931 """
932 newself = self.copy()
933 if name.endswith("*"):
934 name = name[:-1]
935 listAllMatches=True
936 newself.resultsName = name
937 newself.modalResults = not listAllMatches
938 return newself
939
941 """Method to invoke the Python pdb debugger when this element is
942 about to be parsed. Set C{breakFlag} to True to enable, False to
943 disable.
944 """
945 if breakFlag:
946 _parseMethod = self._parse
947 def breaker(instring, loc, doActions=True, callPreParse=True):
948 import pdb
949 pdb.set_trace()
950 return _parseMethod( instring, loc, doActions, callPreParse )
951 breaker._originalParseMethod = _parseMethod
952 self._parse = breaker
953 else:
954 if hasattr(self._parse,"_originalParseMethod"):
955 self._parse = self._parse._originalParseMethod
956 return self
957
959 """Define action to perform when successfully matching parse element definition.
960 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
961 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
962 - s = the original string being parsed (see note below)
963 - loc = the location of the matching substring
964 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
965 If the functions in fns modify the tokens, they can return them as the return
966 value from fn, and the modified list of tokens will replace the original.
967 Otherwise, fn does not need to return any value.
968
969 Optional keyword arguments:
970 - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing
971
972 Note: the default parsing behavior is to expand tabs in the input string
973 before starting the parsing process. See L{I{parseString}<parseString>} for more information
974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
975 consistent view of the parsed string, the parse location, and line and column
976 positions within the parsed string.
977 """
978 self.parseAction = list(map(_trim_arity, list(fns)))
979 self.callDuringTry = kwargs.get("callDuringTry", False)
980 return self
981
983 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
984 self.parseAction += list(map(_trim_arity, list(fns)))
985 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
986 return self
987
989 """Add a boolean predicate function to expression's list of parse actions. See
990 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
991 functions passed to C{addCondition} need to return boolean success/fail of the condition.
992
993 Optional keyword arguments:
994 - message = define a custom message to be used in the raised exception
995 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
996 """
997 msg = kwargs.get("message", "failed user-defined condition")
998 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
999 for fn in fns:
1000 def pa(s,l,t):
1001 if not bool(_trim_arity(fn)(s,l,t)):
1002 raise exc_type(s,l,msg)
1003 self.parseAction.append(pa)
1004 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1005 return self
1006
1008 """Define action to perform if parsing fails at this expression.
1009 Fail acton fn is a callable function that takes the arguments
1010 C{fn(s,loc,expr,err)} where:
1011 - s = string being parsed
1012 - loc = location where expression match was attempted and failed
1013 - expr = the parse expression that failed
1014 - err = the exception thrown
1015 The function returns no value. It may throw C{L{ParseFatalException}}
1016 if it is desired to stop parsing immediately."""
1017 self.failAction = fn
1018 return self
1019
1021 exprsFound = True
1022 while exprsFound:
1023 exprsFound = False
1024 for e in self.ignoreExprs:
1025 try:
1026 while 1:
1027 loc,dummy = e._parse( instring, loc )
1028 exprsFound = True
1029 except ParseException:
1030 pass
1031 return loc
1032
1034 if self.ignoreExprs:
1035 loc = self._skipIgnorables( instring, loc )
1036
1037 if self.skipWhitespace:
1038 wt = self.whiteChars
1039 instrlen = len(instring)
1040 while loc < instrlen and instring[loc] in wt:
1041 loc += 1
1042
1043 return loc
1044
1045 - def parseImpl( self, instring, loc, doActions=True ):
1047
1048 - def postParse( self, instring, loc, tokenlist ):
1050
1051
1052 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1053 debugging = ( self.debug )
1054
1055 if debugging or self.failAction:
1056
1057 if (self.debugActions[0] ):
1058 self.debugActions[0]( instring, loc, self )
1059 if callPreParse and self.callPreparse:
1060 preloc = self.preParse( instring, loc )
1061 else:
1062 preloc = loc
1063 tokensStart = preloc
1064 try:
1065 try:
1066 loc,tokens = self.parseImpl( instring, preloc, doActions )
1067 except IndexError:
1068 raise ParseException( instring, len(instring), self.errmsg, self )
1069 except ParseBaseException as err:
1070
1071 if self.debugActions[2]:
1072 self.debugActions[2]( instring, tokensStart, self, err )
1073 if self.failAction:
1074 self.failAction( instring, tokensStart, self, err )
1075 raise
1076 else:
1077 if callPreParse and self.callPreparse:
1078 preloc = self.preParse( instring, loc )
1079 else:
1080 preloc = loc
1081 tokensStart = preloc
1082 if self.mayIndexError or loc >= len(instring):
1083 try:
1084 loc,tokens = self.parseImpl( instring, preloc, doActions )
1085 except IndexError:
1086 raise ParseException( instring, len(instring), self.errmsg, self )
1087 else:
1088 loc,tokens = self.parseImpl( instring, preloc, doActions )
1089
1090 tokens = self.postParse( instring, loc, tokens )
1091
1092 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1093 if self.parseAction and (doActions or self.callDuringTry):
1094 if debugging:
1095 try:
1096 for fn in self.parseAction:
1097 tokens = fn( instring, tokensStart, retTokens )
1098 if tokens is not None:
1099 retTokens = ParseResults( tokens,
1100 self.resultsName,
1101 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1102 modal=self.modalResults )
1103 except ParseBaseException as err:
1104
1105 if (self.debugActions[2] ):
1106 self.debugActions[2]( instring, tokensStart, self, err )
1107 raise
1108 else:
1109 for fn in self.parseAction:
1110 tokens = fn( instring, tokensStart, retTokens )
1111 if tokens is not None:
1112 retTokens = ParseResults( tokens,
1113 self.resultsName,
1114 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1115 modal=self.modalResults )
1116
1117 if debugging:
1118
1119 if (self.debugActions[1] ):
1120 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1121
1122 return loc, retTokens
1123
1129
1131 try:
1132 self.tryParse(instring, loc)
1133 except (ParseException, IndexError):
1134 return False
1135 else:
1136 return True
1137
1138
1139
1140 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1141 lookup = (self,instring,loc,callPreParse,doActions)
1142 if lookup in ParserElement._exprArgCache:
1143 value = ParserElement._exprArgCache[ lookup ]
1144 if isinstance(value, Exception):
1145 raise value
1146 return (value[0],value[1].copy())
1147 else:
1148 try:
1149 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1150 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1151 return value
1152 except ParseBaseException as pe:
1153 pe.__traceback__ = None
1154 ParserElement._exprArgCache[ lookup ] = pe
1155 raise
1156
1157 _parse = _parseNoCache
1158
1159
1160 _exprArgCache = {}
1161 @staticmethod
1164
1165 _packratEnabled = False
1166 @staticmethod
1168 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1169 Repeated parse attempts at the same string location (which happens
1170 often in many complex grammars) can immediately return a cached value,
1171 instead of re-executing parsing/validating code. Memoizing is done of
1172 both valid results and parsing exceptions.
1173
1174 This speedup may break existing programs that use parse actions that
1175 have side-effects. For this reason, packrat parsing is disabled when
1176 you first import pyparsing. To activate the packrat feature, your
1177 program must call the class method C{ParserElement.enablePackrat()}. If
1178 your program uses C{psyco} to "compile as you go", you must call
1179 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1180 Python will crash. For best results, call C{enablePackrat()} immediately
1181 after importing pyparsing.
1182 """
1183 if not ParserElement._packratEnabled:
1184 ParserElement._packratEnabled = True
1185 ParserElement._parse = ParserElement._parseCache
1186
1188 """Execute the parse expression with the given string.
1189 This is the main interface to the client code, once the complete
1190 expression has been built.
1191
1192 If you want the grammar to require that the entire input string be
1193 successfully parsed, then set C{parseAll} to True (equivalent to ending
1194 the grammar with C{L{StringEnd()}}).
1195
1196 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1197 in order to report proper column numbers in parse actions.
1198 If the input string contains tabs and
1199 the grammar uses parse actions that use the C{loc} argument to index into the
1200 string being parsed, you can ensure you have a consistent view of the input
1201 string by:
1202 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1203 (see L{I{parseWithTabs}<parseWithTabs>})
1204 - define your parse action using the full C{(s,loc,toks)} signature, and
1205 reference the input string using the parse action's C{s} argument
1206 - explictly expand the tabs in your input string before calling
1207 C{parseString}
1208 """
1209 ParserElement.resetCache()
1210 if not self.streamlined:
1211 self.streamline()
1212
1213 for e in self.ignoreExprs:
1214 e.streamline()
1215 if not self.keepTabs:
1216 instring = instring.expandtabs()
1217 try:
1218 loc, tokens = self._parse( instring, 0 )
1219 if parseAll:
1220 loc = self.preParse( instring, loc )
1221 se = Empty() + StringEnd()
1222 se._parse( instring, loc )
1223 except ParseBaseException as exc:
1224 if ParserElement.verbose_stacktrace:
1225 raise
1226 else:
1227
1228 raise exc
1229 else:
1230 return tokens
1231
1233 """Scan the input string for expression matches. Each match will return the
1234 matching tokens, start location, and end location. May be called with optional
1235 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1236 C{overlap} is specified, then overlapping matches will be reported.
1237
1238 Note that the start and end locations are reported relative to the string
1239 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1240 strings with embedded tabs."""
1241 if not self.streamlined:
1242 self.streamline()
1243 for e in self.ignoreExprs:
1244 e.streamline()
1245
1246 if not self.keepTabs:
1247 instring = _ustr(instring).expandtabs()
1248 instrlen = len(instring)
1249 loc = 0
1250 preparseFn = self.preParse
1251 parseFn = self._parse
1252 ParserElement.resetCache()
1253 matches = 0
1254 try:
1255 while loc <= instrlen and matches < maxMatches:
1256 try:
1257 preloc = preparseFn( instring, loc )
1258 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1259 except ParseException:
1260 loc = preloc+1
1261 else:
1262 if nextLoc > loc:
1263 matches += 1
1264 yield tokens, preloc, nextLoc
1265 if overlap:
1266 nextloc = preparseFn( instring, loc )
1267 if nextloc > loc:
1268 loc = nextLoc
1269 else:
1270 loc += 1
1271 else:
1272 loc = nextLoc
1273 else:
1274 loc = preloc+1
1275 except ParseBaseException as exc:
1276 if ParserElement.verbose_stacktrace:
1277 raise
1278 else:
1279
1280 raise exc
1281
1314
1316 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1317 to match the given parse expression. May be called with optional
1318 C{maxMatches} argument, to clip searching after 'n' matches are found.
1319 """
1320 try:
1321 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1322 except ParseBaseException as exc:
1323 if ParserElement.verbose_stacktrace:
1324 raise
1325 else:
1326
1327 raise exc
1328
1329 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1330 """Generator method to split a string using the given expression as a separator.
1331 May be called with optional C{maxsplit} argument, to limit the number of splits;
1332 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1333 matching text should be included in the split results.
1334 """
1335 splits = 0
1336 last = 0
1337 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1338 yield instring[last:s]
1339 if includeSeparators:
1340 yield t[0]
1341 last = e
1342 yield instring[last:]
1343
1345 """Implementation of + operator - returns C{L{And}}"""
1346 if isinstance( other, basestring ):
1347 other = ParserElement._literalStringClass( other )
1348 if not isinstance( other, ParserElement ):
1349 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1350 SyntaxWarning, stacklevel=2)
1351 return None
1352 return And( [ self, other ] )
1353
1355 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1356 if isinstance( other, basestring ):
1357 other = ParserElement._literalStringClass( other )
1358 if not isinstance( other, ParserElement ):
1359 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1360 SyntaxWarning, stacklevel=2)
1361 return None
1362 return other + self
1363
1365 """Implementation of - operator, returns C{L{And}} with error stop"""
1366 if isinstance( other, basestring ):
1367 other = ParserElement._literalStringClass( other )
1368 if not isinstance( other, ParserElement ):
1369 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1370 SyntaxWarning, stacklevel=2)
1371 return None
1372 return And( [ self, And._ErrorStop(), other ] )
1373
1375 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1376 if isinstance( other, basestring ):
1377 other = ParserElement._literalStringClass( other )
1378 if not isinstance( other, ParserElement ):
1379 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1380 SyntaxWarning, stacklevel=2)
1381 return None
1382 return other - self
1383
1385 """Implementation of * operator, allows use of C{expr * 3} in place of
1386 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1387 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1388 may also include C{None} as in:
1389 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1390 to C{expr*n + L{ZeroOrMore}(expr)}
1391 (read as "at least n instances of C{expr}")
1392 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1393 (read as "0 to n instances of C{expr}")
1394 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1395 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1396
1397 Note that C{expr*(None,n)} does not raise an exception if
1398 more than n exprs exist in the input stream; that is,
1399 C{expr*(None,n)} does not enforce a maximum number of expr
1400 occurrences. If this behavior is desired, then write
1401 C{expr*(None,n) + ~expr}
1402
1403 """
1404 if isinstance(other,int):
1405 minElements, optElements = other,0
1406 elif isinstance(other,tuple):
1407 other = (other + (None, None))[:2]
1408 if other[0] is None:
1409 other = (0, other[1])
1410 if isinstance(other[0],int) and other[1] is None:
1411 if other[0] == 0:
1412 return ZeroOrMore(self)
1413 if other[0] == 1:
1414 return OneOrMore(self)
1415 else:
1416 return self*other[0] + ZeroOrMore(self)
1417 elif isinstance(other[0],int) and isinstance(other[1],int):
1418 minElements, optElements = other
1419 optElements -= minElements
1420 else:
1421 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1422 else:
1423 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1424
1425 if minElements < 0:
1426 raise ValueError("cannot multiply ParserElement by negative value")
1427 if optElements < 0:
1428 raise ValueError("second tuple value must be greater or equal to first tuple value")
1429 if minElements == optElements == 0:
1430 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1431
1432 if (optElements):
1433 def makeOptionalList(n):
1434 if n>1:
1435 return Optional(self + makeOptionalList(n-1))
1436 else:
1437 return Optional(self)
1438 if minElements:
1439 if minElements == 1:
1440 ret = self + makeOptionalList(optElements)
1441 else:
1442 ret = And([self]*minElements) + makeOptionalList(optElements)
1443 else:
1444 ret = makeOptionalList(optElements)
1445 else:
1446 if minElements == 1:
1447 ret = self
1448 else:
1449 ret = And([self]*minElements)
1450 return ret
1451
1454
1456 """Implementation of | operator - returns C{L{MatchFirst}}"""
1457 if isinstance( other, basestring ):
1458 other = ParserElement._literalStringClass( other )
1459 if not isinstance( other, ParserElement ):
1460 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1461 SyntaxWarning, stacklevel=2)
1462 return None
1463 return MatchFirst( [ self, other ] )
1464
1466 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1467 if isinstance( other, basestring ):
1468 other = ParserElement._literalStringClass( other )
1469 if not isinstance( other, ParserElement ):
1470 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1471 SyntaxWarning, stacklevel=2)
1472 return None
1473 return other | self
1474
1476 """Implementation of ^ operator - returns C{L{Or}}"""
1477 if isinstance( other, basestring ):
1478 other = ParserElement._literalStringClass( other )
1479 if not isinstance( other, ParserElement ):
1480 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1481 SyntaxWarning, stacklevel=2)
1482 return None
1483 return Or( [ self, other ] )
1484
1486 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1487 if isinstance( other, basestring ):
1488 other = ParserElement._literalStringClass( other )
1489 if not isinstance( other, ParserElement ):
1490 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1491 SyntaxWarning, stacklevel=2)
1492 return None
1493 return other ^ self
1494
1496 """Implementation of & operator - returns C{L{Each}}"""
1497 if isinstance( other, basestring ):
1498 other = ParserElement._literalStringClass( other )
1499 if not isinstance( other, ParserElement ):
1500 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1501 SyntaxWarning, stacklevel=2)
1502 return None
1503 return Each( [ self, other ] )
1504
1506 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1507 if isinstance( other, basestring ):
1508 other = ParserElement._literalStringClass( other )
1509 if not isinstance( other, ParserElement ):
1510 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1511 SyntaxWarning, stacklevel=2)
1512 return None
1513 return other & self
1514
1516 """Implementation of ~ operator - returns C{L{NotAny}}"""
1517 return NotAny( self )
1518
1520 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1521 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1522 could be written as::
1523 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1524
1525 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1526 passed as C{True}.
1527
1528 If C{name} is omitted, same as calling C{L{copy}}.
1529 """
1530 if name is not None:
1531 return self.setResultsName(name)
1532 else:
1533 return self.copy()
1534
1536 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1537 cluttering up returned output.
1538 """
1539 return Suppress( self )
1540
1542 """Disables the skipping of whitespace before matching the characters in the
1543 C{ParserElement}'s defined pattern. This is normally only used internally by
1544 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1545 """
1546 self.skipWhitespace = False
1547 return self
1548
1550 """Overrides the default whitespace chars
1551 """
1552 self.skipWhitespace = True
1553 self.whiteChars = chars
1554 self.copyDefaultWhiteChars = False
1555 return self
1556
1558 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1559 Must be called before C{parseString} when the input grammar contains elements that
1560 match C{<TAB>} characters."""
1561 self.keepTabs = True
1562 return self
1563
1565 """Define expression to be ignored (e.g., comments) while doing pattern
1566 matching; may be called repeatedly, to define multiple comment or other
1567 ignorable patterns.
1568 """
1569 if isinstance(other, basestring):
1570 other = Suppress(other)
1571
1572 if isinstance( other, Suppress ):
1573 if other not in self.ignoreExprs:
1574 self.ignoreExprs.append(other)
1575 else:
1576 self.ignoreExprs.append( Suppress( other.copy() ) )
1577 return self
1578
1579 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1580 """Enable display of debugging messages while doing pattern matching."""
1581 self.debugActions = (startAction or _defaultStartDebugAction,
1582 successAction or _defaultSuccessDebugAction,
1583 exceptionAction or _defaultExceptionDebugAction)
1584 self.debug = True
1585 return self
1586
1588 """Enable display of debugging messages while doing pattern matching.
1589 Set C{flag} to True to enable, False to disable."""
1590 if flag:
1591 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1592 else:
1593 self.debug = False
1594 return self
1595
1598
1601
1603 self.streamlined = True
1604 self.strRepr = None
1605 return self
1606
1609
1610 - def validate( self, validateTrace=[] ):
1611 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1612 self.checkRecursion( [] )
1613
1614 - def parseFile( self, file_or_filename, parseAll=False ):
1615 """Execute the parse expression on the given file or filename.
1616 If a filename is specified (instead of a file object),
1617 the entire file is opened, read, and closed before parsing.
1618 """
1619 try:
1620 file_contents = file_or_filename.read()
1621 except AttributeError:
1622 with open(file_or_filename, "r") as f:
1623 file_contents = f.read()
1624 try:
1625 return self.parseString(file_contents, parseAll)
1626 except ParseBaseException as exc:
1627 if ParserElement.verbose_stacktrace:
1628 raise
1629 else:
1630
1631 raise exc
1632
1634 if isinstance(other, ParserElement):
1635 return self is other or vars(self) == vars(other)
1636 elif isinstance(other, basestring):
1637 return self.matches(other)
1638 else:
1639 return super(ParserElement,self)==other
1640
1642 return not (self == other)
1643
1645 return hash(id(self))
1646
1648 return self == other
1649
1651 return not (self == other)
1652
1653 - def matches(self, testString, parseAll=True):
1654 """Method for quick testing of a parser against a test string. Good for simple
1655 inline microtests of sub expressions while building up larger parser, as in::
1656
1657 expr = Word(nums)
1658 assert expr.matches("100")
1659
1660 Parameters:
1661 - testString - to test against this expression for a match
1662 - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests
1663 """
1664 try:
1665 self.parseString(_ustr(testString), parseAll=parseAll)
1666 return True
1667 except ParseBaseException:
1668 return False
1669
1670 - def runTests(self, tests, parseAll=True, comment='#', printResults=True, failureTests=False):
1671 """Execute the parse expression on a series of test strings, showing each
1672 test, the parsed results or where the parse failed. Quick and easy way to
1673 run a parse expression against a list of sample strings.
1674
1675 Parameters:
1676 - tests - a list of separate test strings, or a multiline string of test strings
1677 - parseAll - (default=True) - flag to pass to C{L{parseString}} when running tests
1678 - comment - (default='#') - expression for indicating embedded comments in the test
1679 string; pass None to disable comment filtering
1680 - printResults - (default=True) prints test output to stdout
1681 - failureTests - (default=False) indicates if these tests are expected to fail parsing
1682
1683 Returns: a (success, results) tuple, where success indicates that all tests succeeded
1684 (or failed if C{failureTest} is True), and the results contain a list of lines of each
1685 test's output
1686 """
1687 if isinstance(tests, basestring):
1688 tests = list(map(str.strip, tests.rstrip().splitlines()))
1689 if isinstance(comment, basestring):
1690 comment = Literal(comment)
1691 allResults = []
1692 comments = []
1693 success = True
1694 for t in tests:
1695 if comment is not None and comment.matches(t, False) or comments and not t:
1696 comments.append(t)
1697 continue
1698 if not t:
1699 continue
1700 out = ['\n'.join(comments), t]
1701 comments = []
1702 try:
1703 result = self.parseString(t, parseAll=parseAll)
1704 out.append(result.dump())
1705 success = success and not failureTests
1706 except ParseBaseException as pe:
1707 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
1708 if '\n' in t:
1709 out.append(line(pe.loc, t))
1710 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
1711 else:
1712 out.append(' '*pe.loc + '^' + fatal)
1713 out.append("FAIL: " + str(pe))
1714 success = success and failureTests
1715 result = pe
1716
1717 if printResults:
1718 out.append('')
1719 print('\n'.join(out))
1720
1721 allResults.append((t, result))
1722
1723 return success, allResults
1724
1725
1726 -class Token(ParserElement):
1727 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1730
1731
1732 -class Empty(Token):
1733 """An empty token, will always match."""
1735 super(Empty,self).__init__()
1736 self.name = "Empty"
1737 self.mayReturnEmpty = True
1738 self.mayIndexError = False
1739
1742 """A token that will never match."""
1744 super(NoMatch,self).__init__()
1745 self.name = "NoMatch"
1746 self.mayReturnEmpty = True
1747 self.mayIndexError = False
1748 self.errmsg = "Unmatchable token"
1749
1750 - def parseImpl( self, instring, loc, doActions=True ):
1752
1755 """Token to exactly match a specified string."""
1757 super(Literal,self).__init__()
1758 self.match = matchString
1759 self.matchLen = len(matchString)
1760 try:
1761 self.firstMatchChar = matchString[0]
1762 except IndexError:
1763 warnings.warn("null string passed to Literal; use Empty() instead",
1764 SyntaxWarning, stacklevel=2)
1765 self.__class__ = Empty
1766 self.name = '"%s"' % _ustr(self.match)
1767 self.errmsg = "Expected " + self.name
1768 self.mayReturnEmpty = False
1769 self.mayIndexError = False
1770
1771
1772
1773
1774
1775 - def parseImpl( self, instring, loc, doActions=True ):
1776 if (instring[loc] == self.firstMatchChar and
1777 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1778 return loc+self.matchLen, self.match
1779 raise ParseException(instring, loc, self.errmsg, self)
1780 _L = Literal
1781 ParserElement._literalStringClass = Literal
1784 """Token to exactly match a specified string as a keyword, that is, it must be
1785 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
1786 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1787 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1788 Accepts two optional constructor arguments in addition to the keyword string:
1789 - C{identChars} is a string of characters that would be valid identifier characters,
1790 defaulting to all alphanumerics + "_" and "$"
1791 - C{caseless} allows case-insensitive matching, default is C{False}.
1792 """
1793 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1794
1796 super(Keyword,self).__init__()
1797 self.match = matchString
1798 self.matchLen = len(matchString)
1799 try:
1800 self.firstMatchChar = matchString[0]
1801 except IndexError:
1802 warnings.warn("null string passed to Keyword; use Empty() instead",
1803 SyntaxWarning, stacklevel=2)
1804 self.name = '"%s"' % self.match
1805 self.errmsg = "Expected " + self.name
1806 self.mayReturnEmpty = False
1807 self.mayIndexError = False
1808 self.caseless = caseless
1809 if caseless:
1810 self.caselessmatch = matchString.upper()
1811 identChars = identChars.upper()
1812 self.identChars = set(identChars)
1813
1814 - def parseImpl( self, instring, loc, doActions=True ):
1815 if self.caseless:
1816 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1817 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1818 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1819 return loc+self.matchLen, self.match
1820 else:
1821 if (instring[loc] == self.firstMatchChar and
1822 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1823 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1824 (loc == 0 or instring[loc-1] not in self.identChars) ):
1825 return loc+self.matchLen, self.match
1826 raise ParseException(instring, loc, self.errmsg, self)
1827
1832
1833 @staticmethod
1838
1840 """Token to match a specified string, ignoring case of letters.
1841 Note: the matched results will always be in the case of the given
1842 match string, NOT the case of the input text.
1843 """
1845 super(CaselessLiteral,self).__init__( matchString.upper() )
1846
1847 self.returnString = matchString
1848 self.name = "'%s'" % self.returnString
1849 self.errmsg = "Expected " + self.name
1850
1851 - def parseImpl( self, instring, loc, doActions=True ):
1852 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1853 return loc+self.matchLen, self.returnString
1854 raise ParseException(instring, loc, self.errmsg, self)
1855
1859
1860 - def parseImpl( self, instring, loc, doActions=True ):
1861 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1862 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1863 return loc+self.matchLen, self.match
1864 raise ParseException(instring, loc, self.errmsg, self)
1865
1867 """Token for matching words composed of allowed character sets.
1868 Defined with string containing all allowed initial characters,
1869 an optional string containing allowed body characters (if omitted,
1870 defaults to the initial character set), and an optional minimum,
1871 maximum, and/or exact length. The default value for C{min} is 1 (a
1872 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1873 are 0, meaning no maximum or exact length restriction. An optional
1874 C{excludeChars} parameter can list characters that might be found in
1875 the input C{bodyChars} string; useful to define a word of all printables
1876 except for one or two characters, for instance.
1877 """
1878 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1879 super(Word,self).__init__()
1880 if excludeChars:
1881 initChars = ''.join(c for c in initChars if c not in excludeChars)
1882 if bodyChars:
1883 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1884 self.initCharsOrig = initChars
1885 self.initChars = set(initChars)
1886 if bodyChars :
1887 self.bodyCharsOrig = bodyChars
1888 self.bodyChars = set(bodyChars)
1889 else:
1890 self.bodyCharsOrig = initChars
1891 self.bodyChars = set(initChars)
1892
1893 self.maxSpecified = max > 0
1894
1895 if min < 1:
1896 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1897
1898 self.minLen = min
1899
1900 if max > 0:
1901 self.maxLen = max
1902 else:
1903 self.maxLen = _MAX_INT
1904
1905 if exact > 0:
1906 self.maxLen = exact
1907 self.minLen = exact
1908
1909 self.name = _ustr(self)
1910 self.errmsg = "Expected " + self.name
1911 self.mayIndexError = False
1912 self.asKeyword = asKeyword
1913
1914 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1915 if self.bodyCharsOrig == self.initCharsOrig:
1916 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1917 elif len(self.initCharsOrig) == 1:
1918 self.reString = "%s[%s]*" % \
1919 (re.escape(self.initCharsOrig),
1920 _escapeRegexRangeChars(self.bodyCharsOrig),)
1921 else:
1922 self.reString = "[%s][%s]*" % \
1923 (_escapeRegexRangeChars(self.initCharsOrig),
1924 _escapeRegexRangeChars(self.bodyCharsOrig),)
1925 if self.asKeyword:
1926 self.reString = r"\b"+self.reString+r"\b"
1927 try:
1928 self.re = re.compile( self.reString )
1929 except:
1930 self.re = None
1931
1932 - def parseImpl( self, instring, loc, doActions=True ):
1933 if self.re:
1934 result = self.re.match(instring,loc)
1935 if not result:
1936 raise ParseException(instring, loc, self.errmsg, self)
1937
1938 loc = result.end()
1939 return loc, result.group()
1940
1941 if not(instring[ loc ] in self.initChars):
1942 raise ParseException(instring, loc, self.errmsg, self)
1943
1944 start = loc
1945 loc += 1
1946 instrlen = len(instring)
1947 bodychars = self.bodyChars
1948 maxloc = start + self.maxLen
1949 maxloc = min( maxloc, instrlen )
1950 while loc < maxloc and instring[loc] in bodychars:
1951 loc += 1
1952
1953 throwException = False
1954 if loc - start < self.minLen:
1955 throwException = True
1956 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1957 throwException = True
1958 if self.asKeyword:
1959 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1960 throwException = True
1961
1962 if throwException:
1963 raise ParseException(instring, loc, self.errmsg, self)
1964
1965 return loc, instring[start:loc]
1966
1968 try:
1969 return super(Word,self).__str__()
1970 except:
1971 pass
1972
1973
1974 if self.strRepr is None:
1975
1976 def charsAsStr(s):
1977 if len(s)>4:
1978 return s[:4]+"..."
1979 else:
1980 return s
1981
1982 if ( self.initCharsOrig != self.bodyCharsOrig ):
1983 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1984 else:
1985 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1986
1987 return self.strRepr
1988
1989
1990 -class Regex(Token):
1991 """Token for matching strings that match a given regular expression.
1992 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1993 """
1994 compiledREtype = type(re.compile("[A-Z]"))
1995 - def __init__( self, pattern, flags=0):
1996 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1997 super(Regex,self).__init__()
1998
1999 if isinstance(pattern, basestring):
2000 if not pattern:
2001 warnings.warn("null string passed to Regex; use Empty() instead",
2002 SyntaxWarning, stacklevel=2)
2003
2004 self.pattern = pattern
2005 self.flags = flags
2006
2007 try:
2008 self.re = re.compile(self.pattern, self.flags)
2009 self.reString = self.pattern
2010 except sre_constants.error:
2011 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2012 SyntaxWarning, stacklevel=2)
2013 raise
2014
2015 elif isinstance(pattern, Regex.compiledREtype):
2016 self.re = pattern
2017 self.pattern = \
2018 self.reString = str(pattern)
2019 self.flags = flags
2020
2021 else:
2022 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2023
2024 self.name = _ustr(self)
2025 self.errmsg = "Expected " + self.name
2026 self.mayIndexError = False
2027 self.mayReturnEmpty = True
2028
2029 - def parseImpl( self, instring, loc, doActions=True ):
2030 result = self.re.match(instring,loc)
2031 if not result:
2032 raise ParseException(instring, loc, self.errmsg, self)
2033
2034 loc = result.end()
2035 d = result.groupdict()
2036 ret = ParseResults(result.group())
2037 if d:
2038 for k in d:
2039 ret[k] = d[k]
2040 return loc,ret
2041
2043 try:
2044 return super(Regex,self).__str__()
2045 except:
2046 pass
2047
2048 if self.strRepr is None:
2049 self.strRepr = "Re:(%s)" % repr(self.pattern)
2050
2051 return self.strRepr
2052
2055 """Token for matching strings that are delimited by quoting characters.
2056 """
2057 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2058 r"""Defined with the following parameters:
2059 - quoteChar - string of one or more characters defining the quote delimiting string
2060 - escChar - character to escape quotes, typically backslash (default=None)
2061 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
2062 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2063 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2064 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2065 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2066 """
2067 super(QuotedString,self).__init__()
2068
2069
2070 quoteChar = quoteChar.strip()
2071 if not quoteChar:
2072 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2073 raise SyntaxError()
2074
2075 if endQuoteChar is None:
2076 endQuoteChar = quoteChar
2077 else:
2078 endQuoteChar = endQuoteChar.strip()
2079 if not endQuoteChar:
2080 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2081 raise SyntaxError()
2082
2083 self.quoteChar = quoteChar
2084 self.quoteCharLen = len(quoteChar)
2085 self.firstQuoteChar = quoteChar[0]
2086 self.endQuoteChar = endQuoteChar
2087 self.endQuoteCharLen = len(endQuoteChar)
2088 self.escChar = escChar
2089 self.escQuote = escQuote
2090 self.unquoteResults = unquoteResults
2091 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2092
2093 if multiline:
2094 self.flags = re.MULTILINE | re.DOTALL
2095 self.pattern = r'%s(?:[^%s%s]' % \
2096 ( re.escape(self.quoteChar),
2097 _escapeRegexRangeChars(self.endQuoteChar[0]),
2098 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2099 else:
2100 self.flags = 0
2101 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2102 ( re.escape(self.quoteChar),
2103 _escapeRegexRangeChars(self.endQuoteChar[0]),
2104 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2105 if len(self.endQuoteChar) > 1:
2106 self.pattern += (
2107 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2108 _escapeRegexRangeChars(self.endQuoteChar[i]))
2109 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2110 )
2111 if escQuote:
2112 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2113 if escChar:
2114 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2115 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2116 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2117
2118 try:
2119 self.re = re.compile(self.pattern, self.flags)
2120 self.reString = self.pattern
2121 except sre_constants.error:
2122 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2123 SyntaxWarning, stacklevel=2)
2124 raise
2125
2126 self.name = _ustr(self)
2127 self.errmsg = "Expected " + self.name
2128 self.mayIndexError = False
2129 self.mayReturnEmpty = True
2130
2131 - def parseImpl( self, instring, loc, doActions=True ):
2132 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2133 if not result:
2134 raise ParseException(instring, loc, self.errmsg, self)
2135
2136 loc = result.end()
2137 ret = result.group()
2138
2139 if self.unquoteResults:
2140
2141
2142 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2143
2144 if isinstance(ret,basestring):
2145
2146 if '\\' in ret and self.convertWhitespaceEscapes:
2147 ws_map = {
2148 r'\t' : '\t',
2149 r'\n' : '\n',
2150 r'\f' : '\f',
2151 r'\r' : '\r',
2152 }
2153 for wslit,wschar in ws_map.items():
2154 ret = ret.replace(wslit, wschar)
2155
2156
2157 if self.escChar:
2158 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2159
2160
2161 if self.escQuote:
2162 ret = ret.replace(self.escQuote, self.endQuoteChar)
2163
2164 return loc, ret
2165
2167 try:
2168 return super(QuotedString,self).__str__()
2169 except:
2170 pass
2171
2172 if self.strRepr is None:
2173 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2174
2175 return self.strRepr
2176
2179 """Token for matching words composed of characters *not* in a given set.
2180 Defined with string containing all disallowed characters, and an optional
2181 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2182 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2183 are 0, meaning no maximum or exact length restriction.
2184 """
2185 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2186 super(CharsNotIn,self).__init__()
2187 self.skipWhitespace = False
2188 self.notChars = notChars
2189
2190 if min < 1:
2191 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2192
2193 self.minLen = min
2194
2195 if max > 0:
2196 self.maxLen = max
2197 else:
2198 self.maxLen = _MAX_INT
2199
2200 if exact > 0:
2201 self.maxLen = exact
2202 self.minLen = exact
2203
2204 self.name = _ustr(self)
2205 self.errmsg = "Expected " + self.name
2206 self.mayReturnEmpty = ( self.minLen == 0 )
2207 self.mayIndexError = False
2208
2209 - def parseImpl( self, instring, loc, doActions=True ):
2210 if instring[loc] in self.notChars:
2211 raise ParseException(instring, loc, self.errmsg, self)
2212
2213 start = loc
2214 loc += 1
2215 notchars = self.notChars
2216 maxlen = min( start+self.maxLen, len(instring) )
2217 while loc < maxlen and \
2218 (instring[loc] not in notchars):
2219 loc += 1
2220
2221 if loc - start < self.minLen:
2222 raise ParseException(instring, loc, self.errmsg, self)
2223
2224 return loc, instring[start:loc]
2225
2227 try:
2228 return super(CharsNotIn, self).__str__()
2229 except:
2230 pass
2231
2232 if self.strRepr is None:
2233 if len(self.notChars) > 4:
2234 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2235 else:
2236 self.strRepr = "!W:(%s)" % self.notChars
2237
2238 return self.strRepr
2239
2241 """Special matching class for matching whitespace. Normally, whitespace is ignored
2242 by pyparsing grammars. This class is included when some whitespace structures
2243 are significant. Define with a string containing the whitespace characters to be
2244 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2245 as defined for the C{L{Word}} class."""
2246 whiteStrs = {
2247 " " : "<SPC>",
2248 "\t": "<TAB>",
2249 "\n": "<LF>",
2250 "\r": "<CR>",
2251 "\f": "<FF>",
2252 }
2253 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2254 super(White,self).__init__()
2255 self.matchWhite = ws
2256 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2257
2258 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2259 self.mayReturnEmpty = True
2260 self.errmsg = "Expected " + self.name
2261
2262 self.minLen = min
2263
2264 if max > 0:
2265 self.maxLen = max
2266 else:
2267 self.maxLen = _MAX_INT
2268
2269 if exact > 0:
2270 self.maxLen = exact
2271 self.minLen = exact
2272
2273 - def parseImpl( self, instring, loc, doActions=True ):
2274 if not(instring[ loc ] in self.matchWhite):
2275 raise ParseException(instring, loc, self.errmsg, self)
2276 start = loc
2277 loc += 1
2278 maxloc = start + self.maxLen
2279 maxloc = min( maxloc, len(instring) )
2280 while loc < maxloc and instring[loc] in self.matchWhite:
2281 loc += 1
2282
2283 if loc - start < self.minLen:
2284 raise ParseException(instring, loc, self.errmsg, self)
2285
2286 return loc, instring[start:loc]
2287
2291 super(_PositionToken,self).__init__()
2292 self.name=self.__class__.__name__
2293 self.mayReturnEmpty = True
2294 self.mayIndexError = False
2295
2297 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2301
2303 if col(loc,instring) != self.col:
2304 instrlen = len(instring)
2305 if self.ignoreExprs:
2306 loc = self._skipIgnorables( instring, loc )
2307 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2308 loc += 1
2309 return loc
2310
2311 - def parseImpl( self, instring, loc, doActions=True ):
2312 thiscol = col( loc, instring )
2313 if thiscol > self.col:
2314 raise ParseException( instring, loc, "Text not in expected column", self )
2315 newloc = loc + self.col - thiscol
2316 ret = instring[ loc: newloc ]
2317 return newloc, ret
2318
2320 """Matches if current position is at the beginning of a line within the parse string"""
2325
2327 preloc = super(LineStart,self).preParse(instring,loc)
2328 if instring[preloc] == "\n":
2329 loc += 1
2330 return loc
2331
2332 - def parseImpl( self, instring, loc, doActions=True ):
2333 if not( loc==0 or
2334 (loc == self.preParse( instring, 0 )) or
2335 (instring[loc-1] == "\n") ):
2336 raise ParseException(instring, loc, self.errmsg, self)
2337 return loc, []
2338
2340 """Matches if current position is at the end of a line within the parse string"""
2345
2346 - def parseImpl( self, instring, loc, doActions=True ):
2347 if loc<len(instring):
2348 if instring[loc] == "\n":
2349 return loc+1, "\n"
2350 else:
2351 raise ParseException(instring, loc, self.errmsg, self)
2352 elif loc == len(instring):
2353 return loc+1, []
2354 else:
2355 raise ParseException(instring, loc, self.errmsg, self)
2356
2358 """Matches if current position is at the beginning of the parse string"""
2362
2363 - def parseImpl( self, instring, loc, doActions=True ):
2364 if loc != 0:
2365
2366 if loc != self.preParse( instring, 0 ):
2367 raise ParseException(instring, loc, self.errmsg, self)
2368 return loc, []
2369
2371 """Matches if current position is at the end of the parse string"""
2375
2376 - def parseImpl( self, instring, loc, doActions=True ):
2377 if loc < len(instring):
2378 raise ParseException(instring, loc, self.errmsg, self)
2379 elif loc == len(instring):
2380 return loc+1, []
2381 elif loc > len(instring):
2382 return loc, []
2383 else:
2384 raise ParseException(instring, loc, self.errmsg, self)
2385
2387 """Matches if the current position is at the beginning of a Word, and
2388 is not preceded by any character in a given set of C{wordChars}
2389 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2390 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2391 the string being parsed, or at the beginning of a line.
2392 """
2394 super(WordStart,self).__init__()
2395 self.wordChars = set(wordChars)
2396 self.errmsg = "Not at the start of a word"
2397
2398 - def parseImpl(self, instring, loc, doActions=True ):
2399 if loc != 0:
2400 if (instring[loc-1] in self.wordChars or
2401 instring[loc] not in self.wordChars):
2402 raise ParseException(instring, loc, self.errmsg, self)
2403 return loc, []
2404
2406 """Matches if the current position is at the end of a Word, and
2407 is not followed by any character in a given set of C{wordChars}
2408 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2409 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2410 the string being parsed, or at the end of a line.
2411 """
2413 super(WordEnd,self).__init__()
2414 self.wordChars = set(wordChars)
2415 self.skipWhitespace = False
2416 self.errmsg = "Not at the end of a word"
2417
2418 - def parseImpl(self, instring, loc, doActions=True ):
2419 instrlen = len(instring)
2420 if instrlen>0 and loc<instrlen:
2421 if (instring[loc] in self.wordChars or
2422 instring[loc-1] not in self.wordChars):
2423 raise ParseException(instring, loc, self.errmsg, self)
2424 return loc, []
2425
2428 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2429 - def __init__( self, exprs, savelist = False ):
2430 super(ParseExpression,self).__init__(savelist)
2431 if isinstance( exprs, _generatorType ):
2432 exprs = list(exprs)
2433
2434 if isinstance( exprs, basestring ):
2435 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
2436 elif isinstance( exprs, collections.Sequence ):
2437
2438 if all(isinstance(expr, basestring) for expr in exprs):
2439 exprs = map(ParserElement._literalStringClass, exprs)
2440 self.exprs = list(exprs)
2441 else:
2442 try:
2443 self.exprs = list( exprs )
2444 except TypeError:
2445 self.exprs = [ exprs ]
2446 self.callPreparse = False
2447
2449 return self.exprs[i]
2450
2452 self.exprs.append( other )
2453 self.strRepr = None
2454 return self
2455
2457 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2458 all contained expressions."""
2459 self.skipWhitespace = False
2460 self.exprs = [ e.copy() for e in self.exprs ]
2461 for e in self.exprs:
2462 e.leaveWhitespace()
2463 return self
2464
2466 if isinstance( other, Suppress ):
2467 if other not in self.ignoreExprs:
2468 super( ParseExpression, self).ignore( other )
2469 for e in self.exprs:
2470 e.ignore( self.ignoreExprs[-1] )
2471 else:
2472 super( ParseExpression, self).ignore( other )
2473 for e in self.exprs:
2474 e.ignore( self.ignoreExprs[-1] )
2475 return self
2476
2478 try:
2479 return super(ParseExpression,self).__str__()
2480 except:
2481 pass
2482
2483 if self.strRepr is None:
2484 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2485 return self.strRepr
2486
2488 super(ParseExpression,self).streamline()
2489
2490 for e in self.exprs:
2491 e.streamline()
2492
2493
2494
2495
2496 if ( len(self.exprs) == 2 ):
2497 other = self.exprs[0]
2498 if ( isinstance( other, self.__class__ ) and
2499 not(other.parseAction) and
2500 other.resultsName is None and
2501 not other.debug ):
2502 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2503 self.strRepr = None
2504 self.mayReturnEmpty |= other.mayReturnEmpty
2505 self.mayIndexError |= other.mayIndexError
2506
2507 other = self.exprs[-1]
2508 if ( isinstance( other, self.__class__ ) and
2509 not(other.parseAction) and
2510 other.resultsName is None and
2511 not other.debug ):
2512 self.exprs = self.exprs[:-1] + other.exprs[:]
2513 self.strRepr = None
2514 self.mayReturnEmpty |= other.mayReturnEmpty
2515 self.mayIndexError |= other.mayIndexError
2516
2517 self.errmsg = "Expected " + _ustr(self)
2518
2519 return self
2520
2524
2525 - def validate( self, validateTrace=[] ):
2526 tmp = validateTrace[:]+[self]
2527 for e in self.exprs:
2528 e.validate(tmp)
2529 self.checkRecursion( [] )
2530
2535
2536 -class And(ParseExpression):
2537 """Requires all given C{ParseExpression}s to be found in the given order.
2538 Expressions may be separated by whitespace.
2539 May be constructed using the C{'+'} operator.
2540 May also be constructed using the C{'-'} operator, which will suppress backtracking.
2541 """
2542
2548
2549 - def __init__( self, exprs, savelist = True ):
2550 super(And,self).__init__(exprs, savelist)
2551 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2552 self.setWhitespaceChars( self.exprs[0].whiteChars )
2553 self.skipWhitespace = self.exprs[0].skipWhitespace
2554 self.callPreparse = True
2555
2556 - def parseImpl( self, instring, loc, doActions=True ):
2557
2558
2559 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2560 errorStop = False
2561 for e in self.exprs[1:]:
2562 if isinstance(e, And._ErrorStop):
2563 errorStop = True
2564 continue
2565 if errorStop:
2566 try:
2567 loc, exprtokens = e._parse( instring, loc, doActions )
2568 except ParseSyntaxException:
2569 raise
2570 except ParseBaseException as pe:
2571 pe.__traceback__ = None
2572 raise ParseSyntaxException(pe)
2573 except IndexError:
2574 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2575 else:
2576 loc, exprtokens = e._parse( instring, loc, doActions )
2577 if exprtokens or exprtokens.haskeys():
2578 resultlist += exprtokens
2579 return loc, resultlist
2580
2582 if isinstance( other, basestring ):
2583 other = ParserElement._literalStringClass( other )
2584 return self.append( other )
2585
2587 subRecCheckList = parseElementList[:] + [ self ]
2588 for e in self.exprs:
2589 e.checkRecursion( subRecCheckList )
2590 if not e.mayReturnEmpty:
2591 break
2592
2594 if hasattr(self,"name"):
2595 return self.name
2596
2597 if self.strRepr is None:
2598 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2599
2600 return self.strRepr
2601
2602
2603 -class Or(ParseExpression):
2604 """Requires that at least one C{ParseExpression} is found.
2605 If two expressions match, the expression that matches the longest string will be used.
2606 May be constructed using the C{'^'} operator.
2607 """
2608 - def __init__( self, exprs, savelist = False ):
2609 super(Or,self).__init__(exprs, savelist)
2610 if self.exprs:
2611 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2612 else:
2613 self.mayReturnEmpty = True
2614
2615 - def parseImpl( self, instring, loc, doActions=True ):
2616 maxExcLoc = -1
2617 maxException = None
2618 matches = []
2619 for e in self.exprs:
2620 try:
2621 loc2 = e.tryParse( instring, loc )
2622 except ParseException as err:
2623 err.__traceback__ = None
2624 if err.loc > maxExcLoc:
2625 maxException = err
2626 maxExcLoc = err.loc
2627 except IndexError:
2628 if len(instring) > maxExcLoc:
2629 maxException = ParseException(instring,len(instring),e.errmsg,self)
2630 maxExcLoc = len(instring)
2631 else:
2632
2633 matches.append((loc2, e))
2634
2635 if matches:
2636 matches.sort(key=lambda x: -x[0])
2637 for _,e in matches:
2638 try:
2639 return e._parse( instring, loc, doActions )
2640 except ParseException as err:
2641 err.__traceback__ = None
2642 if err.loc > maxExcLoc:
2643 maxException = err
2644 maxExcLoc = err.loc
2645
2646 if maxException is not None:
2647 maxException.msg = self.errmsg
2648 raise maxException
2649 else:
2650 raise ParseException(instring, loc, "no defined alternatives to match", self)
2651
2652
2654 if isinstance( other, basestring ):
2655 other = ParserElement._literalStringClass( other )
2656 return self.append( other )
2657
2659 if hasattr(self,"name"):
2660 return self.name
2661
2662 if self.strRepr is None:
2663 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2664
2665 return self.strRepr
2666
2668 subRecCheckList = parseElementList[:] + [ self ]
2669 for e in self.exprs:
2670 e.checkRecursion( subRecCheckList )
2671
2674 """Requires that at least one C{ParseExpression} is found.
2675 If two expressions match, the first one listed is the one that will match.
2676 May be constructed using the C{'|'} operator.
2677 """
2678 - def __init__( self, exprs, savelist = False ):
2679 super(MatchFirst,self).__init__(exprs, savelist)
2680 if self.exprs:
2681 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2682 else:
2683 self.mayReturnEmpty = True
2684
2685 - def parseImpl( self, instring, loc, doActions=True ):
2686 maxExcLoc = -1
2687 maxException = None
2688 for e in self.exprs:
2689 try:
2690 ret = e._parse( instring, loc, doActions )
2691 return ret
2692 except ParseException as err:
2693 if err.loc > maxExcLoc:
2694 maxException = err
2695 maxExcLoc = err.loc
2696 except IndexError:
2697 if len(instring) > maxExcLoc:
2698 maxException = ParseException(instring,len(instring),e.errmsg,self)
2699 maxExcLoc = len(instring)
2700
2701
2702 else:
2703 if maxException is not None:
2704 maxException.msg = self.errmsg
2705 raise maxException
2706 else:
2707 raise ParseException(instring, loc, "no defined alternatives to match", self)
2708
2710 if isinstance( other, basestring ):
2711 other = ParserElement._literalStringClass( other )
2712 return self.append( other )
2713
2715 if hasattr(self,"name"):
2716 return self.name
2717
2718 if self.strRepr is None:
2719 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2720
2721 return self.strRepr
2722
2724 subRecCheckList = parseElementList[:] + [ self ]
2725 for e in self.exprs:
2726 e.checkRecursion( subRecCheckList )
2727
2728
2729 -class Each(ParseExpression):
2730 """Requires all given C{ParseExpression}s to be found, but in any order.
2731 Expressions may be separated by whitespace.
2732 May be constructed using the C{'&'} operator.
2733 """
2734 - def __init__( self, exprs, savelist = True ):
2735 super(Each,self).__init__(exprs, savelist)
2736 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2737 self.skipWhitespace = True
2738 self.initExprGroups = True
2739
2740 - def parseImpl( self, instring, loc, doActions=True ):
2741 if self.initExprGroups:
2742 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
2743 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2744 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
2745 self.optionals = opt1 + opt2
2746 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2747 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2748 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2749 self.required += self.multirequired
2750 self.initExprGroups = False
2751 tmpLoc = loc
2752 tmpReqd = self.required[:]
2753 tmpOpt = self.optionals[:]
2754 matchOrder = []
2755
2756 keepMatching = True
2757 while keepMatching:
2758 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2759 failed = []
2760 for e in tmpExprs:
2761 try:
2762 tmpLoc = e.tryParse( instring, tmpLoc )
2763 except ParseException:
2764 failed.append(e)
2765 else:
2766 matchOrder.append(self.opt1map.get(id(e),e))
2767 if e in tmpReqd:
2768 tmpReqd.remove(e)
2769 elif e in tmpOpt:
2770 tmpOpt.remove(e)
2771 if len(failed) == len(tmpExprs):
2772 keepMatching = False
2773
2774 if tmpReqd:
2775 missing = ", ".join(_ustr(e) for e in tmpReqd)
2776 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2777
2778
2779 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2780
2781 resultlist = []
2782 for e in matchOrder:
2783 loc,results = e._parse(instring,loc,doActions)
2784 resultlist.append(results)
2785
2786 finalResults = ParseResults()
2787 for r in resultlist:
2788 dups = {}
2789 for k in r.keys():
2790 if k in finalResults:
2791 tmp = ParseResults(finalResults[k])
2792 tmp += ParseResults(r[k])
2793 dups[k] = tmp
2794 finalResults += ParseResults(r)
2795 for k,v in dups.items():
2796 finalResults[k] = v
2797 return loc, finalResults
2798
2800 if hasattr(self,"name"):
2801 return self.name
2802
2803 if self.strRepr is None:
2804 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2805
2806 return self.strRepr
2807
2809 subRecCheckList = parseElementList[:] + [ self ]
2810 for e in self.exprs:
2811 e.checkRecursion( subRecCheckList )
2812
2815 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2816 - def __init__( self, expr, savelist=False ):
2817 super(ParseElementEnhance,self).__init__(savelist)
2818 if isinstance( expr, basestring ):
2819 expr = ParserElement._literalStringClass(expr)
2820 self.expr = expr
2821 self.strRepr = None
2822 if expr is not None:
2823 self.mayIndexError = expr.mayIndexError
2824 self.mayReturnEmpty = expr.mayReturnEmpty
2825 self.setWhitespaceChars( expr.whiteChars )
2826 self.skipWhitespace = expr.skipWhitespace
2827 self.saveAsList = expr.saveAsList
2828 self.callPreparse = expr.callPreparse
2829 self.ignoreExprs.extend(expr.ignoreExprs)
2830
2831 - def parseImpl( self, instring, loc, doActions=True ):
2832 if self.expr is not None:
2833 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2834 else:
2835 raise ParseException("",loc,self.errmsg,self)
2836
2838 self.skipWhitespace = False
2839 self.expr = self.expr.copy()
2840 if self.expr is not None:
2841 self.expr.leaveWhitespace()
2842 return self
2843
2845 if isinstance( other, Suppress ):
2846 if other not in self.ignoreExprs:
2847 super( ParseElementEnhance, self).ignore( other )
2848 if self.expr is not None:
2849 self.expr.ignore( self.ignoreExprs[-1] )
2850 else:
2851 super( ParseElementEnhance, self).ignore( other )
2852 if self.expr is not None:
2853 self.expr.ignore( self.ignoreExprs[-1] )
2854 return self
2855
2861
2863 if self in parseElementList:
2864 raise RecursiveGrammarException( parseElementList+[self] )
2865 subRecCheckList = parseElementList[:] + [ self ]
2866 if self.expr is not None:
2867 self.expr.checkRecursion( subRecCheckList )
2868
2869 - def validate( self, validateTrace=[] ):
2870 tmp = validateTrace[:]+[self]
2871 if self.expr is not None:
2872 self.expr.validate(tmp)
2873 self.checkRecursion( [] )
2874
2876 try:
2877 return super(ParseElementEnhance,self).__str__()
2878 except:
2879 pass
2880
2881 if self.strRepr is None and self.expr is not None:
2882 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2883 return self.strRepr
2884
2887 """Lookahead matching of the given parse expression. C{FollowedBy}
2888 does *not* advance the parsing position within the input string, it only
2889 verifies that the specified parse expression matches at the current
2890 position. C{FollowedBy} always returns a null token list."""
2894
2895 - def parseImpl( self, instring, loc, doActions=True ):
2896 self.expr.tryParse( instring, loc )
2897 return loc, []
2898
2899
2900 -class NotAny(ParseElementEnhance):
2901 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2902 does *not* advance the parsing position within the input string, it only
2903 verifies that the specified parse expression does *not* match at the current
2904 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2905 always returns a null token list. May be constructed using the '~' operator."""
2907 super(NotAny,self).__init__(expr)
2908
2909 self.skipWhitespace = False
2910 self.mayReturnEmpty = True
2911 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2912
2913 - def parseImpl( self, instring, loc, doActions=True ):
2917
2919 if hasattr(self,"name"):
2920 return self.name
2921
2922 if self.strRepr is None:
2923 self.strRepr = "~{" + _ustr(self.expr) + "}"
2924
2925 return self.strRepr
2926
2929 """Repetition of one or more of the given expression.
2930
2931 Parameters:
2932 - expr - expression that must match one or more times
2933 - stopOn - (default=None) - expression for a terminating sentinel
2934 (only required if the sentinel would ordinarily match the repetition
2935 expression)
2936 """
2937 - def __init__( self, expr, stopOn=None):
2938 super(OneOrMore, self).__init__(expr)
2939 ender = stopOn
2940 if isinstance(ender, basestring):
2941 ender = ParserElement._literalStringClass(ender)
2942 self.not_ender = ~ender if ender is not None else None
2943
2944 - def parseImpl( self, instring, loc, doActions=True ):
2945 self_expr_parse = self.expr._parse
2946 self_skip_ignorables = self._skipIgnorables
2947 check_ender = self.not_ender is not None
2948 if check_ender:
2949 try_not_ender = self.not_ender.tryParse
2950
2951
2952
2953 if check_ender:
2954 try_not_ender(instring, loc)
2955 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
2956 try:
2957 hasIgnoreExprs = (not not self.ignoreExprs)
2958 while 1:
2959 if check_ender:
2960 try_not_ender(instring, loc)
2961 if hasIgnoreExprs:
2962 preloc = self_skip_ignorables( instring, loc )
2963 else:
2964 preloc = loc
2965 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
2966 if tmptokens or tmptokens.haskeys():
2967 tokens += tmptokens
2968 except (ParseException,IndexError):
2969 pass
2970
2971 return loc, tokens
2972
2974 if hasattr(self,"name"):
2975 return self.name
2976
2977 if self.strRepr is None:
2978 self.strRepr = "{" + _ustr(self.expr) + "}..."
2979
2980 return self.strRepr
2981
2986
2988 """Optional repetition of zero or more of the given expression.
2989
2990 Parameters:
2991 - expr - expression that must match zero or more times
2992 - stopOn - (default=None) - expression for a terminating sentinel
2993 (only required if the sentinel would ordinarily match the repetition
2994 expression)
2995 """
2996 - def __init__( self, expr, stopOn=None):
2999
3000 - def parseImpl( self, instring, loc, doActions=True ):
3005
3007 if hasattr(self,"name"):
3008 return self.name
3009
3010 if self.strRepr is None:
3011 self.strRepr = "[" + _ustr(self.expr) + "]..."
3012
3013 return self.strRepr
3014
3021
3022 _optionalNotMatched = _NullToken()
3024 """Optional matching of the given expression.
3025
3026 Parameters:
3027 - expr - expression that must match zero or more times
3028 - default (optional) - value to be returned if the optional expression
3029 is not found.
3030 """
3032 super(Optional,self).__init__( expr, savelist=False )
3033 self.defaultValue = default
3034 self.mayReturnEmpty = True
3035
3036 - def parseImpl( self, instring, loc, doActions=True ):
3037 try:
3038 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3039 except (ParseException,IndexError):
3040 if self.defaultValue is not _optionalNotMatched:
3041 if self.expr.resultsName:
3042 tokens = ParseResults([ self.defaultValue ])
3043 tokens[self.expr.resultsName] = self.defaultValue
3044 else:
3045 tokens = [ self.defaultValue ]
3046 else:
3047 tokens = []
3048 return loc, tokens
3049
3051 if hasattr(self,"name"):
3052 return self.name
3053
3054 if self.strRepr is None:
3055 self.strRepr = "[" + _ustr(self.expr) + "]"
3056
3057 return self.strRepr
3058
3059 -class SkipTo(ParseElementEnhance):
3060 """Token for skipping over all undefined text until the matched expression is found.
3061
3062 Parameters:
3063 - expr - target expression marking the end of the data to be skipped
3064 - include - (default=False) if True, the target expression is also parsed
3065 (the skipped text and target expression are returned as a 2-element list).
3066 - ignore - (default=None) used to define grammars (typically quoted strings and
3067 comments) that might contain false matches to the target expression
3068 - failOn - (default=None) define expressions that are not allowed to be
3069 included in the skipped test; if found before the target expression is found,
3070 the SkipTo is not a match
3071 """
3072 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3073 super( SkipTo, self ).__init__( other )
3074 self.ignoreExpr = ignore
3075 self.mayReturnEmpty = True
3076 self.mayIndexError = False
3077 self.includeMatch = include
3078 self.asList = False
3079 if isinstance(failOn, basestring):
3080 self.failOn = ParserElement._literalStringClass(failOn)
3081 else:
3082 self.failOn = failOn
3083 self.errmsg = "No match found for "+_ustr(self.expr)
3084
3085 - def parseImpl( self, instring, loc, doActions=True ):
3086 startloc = loc
3087 instrlen = len(instring)
3088 expr = self.expr
3089 expr_parse = self.expr._parse
3090 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
3091 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
3092
3093 tmploc = loc
3094 while tmploc <= instrlen:
3095 if self_failOn_canParseNext is not None:
3096
3097 if self_failOn_canParseNext(instring, tmploc):
3098 break
3099
3100 if self_ignoreExpr_tryParse is not None:
3101
3102 while 1:
3103 try:
3104 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3105 except ParseBaseException:
3106 break
3107
3108 try:
3109 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3110 except (ParseException, IndexError):
3111
3112 tmploc += 1
3113 else:
3114
3115 break
3116
3117 else:
3118
3119 raise ParseException(instring, loc, self.errmsg, self)
3120
3121
3122 loc = tmploc
3123 skiptext = instring[startloc:loc]
3124 skipresult = ParseResults(skiptext)
3125
3126 if self.includeMatch:
3127 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3128 skipresult += mat
3129
3130 return loc, skipresult
3131
3132 -class Forward(ParseElementEnhance):
3133 """Forward declaration of an expression to be defined later -
3134 used for recursive grammars, such as algebraic infix notation.
3135 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3136
3137 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3138 Specifically, '|' has a lower precedence than '<<', so that::
3139 fwdExpr << a | b | c
3140 will actually be evaluated as::
3141 (fwdExpr << a) | b | c
3142 thereby leaving b and c out as parseable alternatives. It is recommended that you
3143 explicitly group the values inserted into the C{Forward}::
3144 fwdExpr << (a | b | c)
3145 Converting to use the '<<=' operator instead will avoid this problem.
3146 """
3149
3151 if isinstance( other, basestring ):
3152 other = ParserElement._literalStringClass(other)
3153 self.expr = other
3154 self.strRepr = None
3155 self.mayIndexError = self.expr.mayIndexError
3156 self.mayReturnEmpty = self.expr.mayReturnEmpty
3157 self.setWhitespaceChars( self.expr.whiteChars )
3158 self.skipWhitespace = self.expr.skipWhitespace
3159 self.saveAsList = self.expr.saveAsList
3160 self.ignoreExprs.extend(self.expr.ignoreExprs)
3161 return self
3162
3164 return self << other
3165
3167 self.skipWhitespace = False
3168 return self
3169
3171 if not self.streamlined:
3172 self.streamlined = True
3173 if self.expr is not None:
3174 self.expr.streamline()
3175 return self
3176
3177 - def validate( self, validateTrace=[] ):
3178 if self not in validateTrace:
3179 tmp = validateTrace[:]+[self]
3180 if self.expr is not None:
3181 self.expr.validate(tmp)
3182 self.checkRecursion([])
3183
3185 if hasattr(self,"name"):
3186 return self.name
3187 return self.__class__.__name__ + ": ..."
3188
3189
3190 self._revertClass = self.__class__
3191 self.__class__ = _ForwardNoRecurse
3192 try:
3193 if self.expr is not None:
3194 retString = _ustr(self.expr)
3195 else:
3196 retString = "None"
3197 finally:
3198 self.__class__ = self._revertClass
3199 return self.__class__.__name__ + ": " + retString
3200
3202 if self.expr is not None:
3203 return super(Forward,self).copy()
3204 else:
3205 ret = Forward()
3206 ret <<= self
3207 return ret
3208
3212
3214 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3215 - def __init__( self, expr, savelist=False ):
3218
3220 """Converter to concatenate all matching tokens to a single string.
3221 By default, the matching patterns must also be contiguous in the input string;
3222 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3223 """
3224 - def __init__( self, expr, joinString="", adjacent=True ):
3225 super(Combine,self).__init__( expr )
3226
3227 if adjacent:
3228 self.leaveWhitespace()
3229 self.adjacent = adjacent
3230 self.skipWhitespace = True
3231 self.joinString = joinString
3232 self.callPreparse = True
3233
3240
3241 - def postParse( self, instring, loc, tokenlist ):
3242 retToks = tokenlist.copy()
3243 del retToks[:]
3244 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3245
3246 if self.resultsName and retToks.haskeys():
3247 return [ retToks ]
3248 else:
3249 return retToks
3250
3251 -class Group(TokenConverter):
3252 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3254 super(Group,self).__init__( expr )
3255 self.saveAsList = True
3256
3257 - def postParse( self, instring, loc, tokenlist ):
3258 return [ tokenlist ]
3259
3260 -class Dict(TokenConverter):
3261 """Converter to return a repetitive expression as a list, but also as a dictionary.
3262 Each element can also be referenced using the first token in the expression as its key.
3263 Useful for tabular report scraping when the first column can be used as a item key.
3264 """
3266 super(Dict,self).__init__( expr )
3267 self.saveAsList = True
3268
3269 - def postParse( self, instring, loc, tokenlist ):
3270 for i,tok in enumerate(tokenlist):
3271 if len(tok) == 0:
3272 continue
3273 ikey = tok[0]
3274 if isinstance(ikey,int):
3275 ikey = _ustr(tok[0]).strip()
3276 if len(tok)==1:
3277 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3278 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3279 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3280 else:
3281 dictvalue = tok.copy()
3282 del dictvalue[0]
3283 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3284 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3285 else:
3286 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3287
3288 if self.resultsName:
3289 return [ tokenlist ]
3290 else:
3291 return tokenlist
3292
3295 """Converter for ignoring the results of a parsed expression."""
3296 - def postParse( self, instring, loc, tokenlist ):
3298
3301
3304 """Wrapper for parse actions, to ensure they are only called once."""
3306 self.callable = _trim_arity(methodCall)
3307 self.called = False
3309 if not self.called:
3310 results = self.callable(s,l,t)
3311 self.called = True
3312 return results
3313 raise ParseException(s,l,"")
3316
3318 """Decorator for debugging parse actions."""
3319 f = _trim_arity(f)
3320 def z(*paArgs):
3321 thisFunc = f.__name__
3322 s,l,t = paArgs[-3:]
3323 if len(paArgs)>3:
3324 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3325 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3326 try:
3327 ret = f(*paArgs)
3328 except Exception as exc:
3329 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3330 raise
3331 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3332 return ret
3333 try:
3334 z.__name__ = f.__name__
3335 except AttributeError:
3336 pass
3337 return z
3338
3339
3340
3341
3342 -def delimitedList( expr, delim=",", combine=False ):
3343 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3344 By default, the list elements and delimiters can have intervening whitespace, and
3345 comments, but this can be overridden by passing C{combine=True} in the constructor.
3346 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3347 string, with the delimiters included; otherwise, the matching tokens are returned
3348 as a list of tokens, with the delimiters suppressed.
3349 """
3350 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3351 if combine:
3352 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3353 else:
3354 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3355
3357 """Helper to define a counted list of expressions.
3358 This helper defines a pattern of the form::
3359 integer expr expr expr...
3360 where the leading integer tells how many expr expressions follow.
3361 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3362 """
3363 arrayExpr = Forward()
3364 def countFieldParseAction(s,l,t):
3365 n = t[0]
3366 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3367 return []
3368 if intExpr is None:
3369 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3370 else:
3371 intExpr = intExpr.copy()
3372 intExpr.setName("arrayLen")
3373 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3374 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
3375
3377 ret = []
3378 for i in L:
3379 if isinstance(i,list):
3380 ret.extend(_flatten(i))
3381 else:
3382 ret.append(i)
3383 return ret
3384
3386 """Helper to define an expression that is indirectly defined from
3387 the tokens matched in a previous expression, that is, it looks
3388 for a 'repeat' of a previous expression. For example::
3389 first = Word(nums)
3390 second = matchPreviousLiteral(first)
3391 matchExpr = first + ":" + second
3392 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3393 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3394 If this is not desired, use C{matchPreviousExpr}.
3395 Do *not* use with packrat parsing enabled.
3396 """
3397 rep = Forward()
3398 def copyTokenToRepeater(s,l,t):
3399 if t:
3400 if len(t) == 1:
3401 rep << t[0]
3402 else:
3403
3404 tflat = _flatten(t.asList())
3405 rep << And(Literal(tt) for tt in tflat)
3406 else:
3407 rep << Empty()
3408 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3409 rep.setName('(prev) ' + _ustr(expr))
3410 return rep
3411
3413 """Helper to define an expression that is indirectly defined from
3414 the tokens matched in a previous expression, that is, it looks
3415 for a 'repeat' of a previous expression. For example::
3416 first = Word(nums)
3417 second = matchPreviousExpr(first)
3418 matchExpr = first + ":" + second
3419 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3420 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3421 the expressions are evaluated first, and then compared, so
3422 C{"1"} is compared with C{"10"}.
3423 Do *not* use with packrat parsing enabled.
3424 """
3425 rep = Forward()
3426 e2 = expr.copy()
3427 rep <<= e2
3428 def copyTokenToRepeater(s,l,t):
3429 matchTokens = _flatten(t.asList())
3430 def mustMatchTheseTokens(s,l,t):
3431 theseTokens = _flatten(t.asList())
3432 if theseTokens != matchTokens:
3433 raise ParseException("",0,"")
3434 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3435 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3436 rep.setName('(prev) ' + _ustr(expr))
3437 return rep
3438
3440
3441 for c in r"\^-]":
3442 s = s.replace(c,_bslash+c)
3443 s = s.replace("\n",r"\n")
3444 s = s.replace("\t",r"\t")
3445 return _ustr(s)
3446
3447 -def oneOf( strs, caseless=False, useRegex=True ):
3448 """Helper to quickly define a set of alternative Literals, and makes sure to do
3449 longest-first testing when there is a conflict, regardless of the input order,
3450 but returns a C{L{MatchFirst}} for best performance.
3451
3452 Parameters:
3453 - strs - a string of space-delimited literals, or a list of string literals
3454 - caseless - (default=False) - treat all literals as caseless
3455 - useRegex - (default=True) - as an optimization, will generate a Regex
3456 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3457 if creating a C{Regex} raises an exception)
3458 """
3459 if caseless:
3460 isequal = ( lambda a,b: a.upper() == b.upper() )
3461 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3462 parseElementClass = CaselessLiteral
3463 else:
3464 isequal = ( lambda a,b: a == b )
3465 masks = ( lambda a,b: b.startswith(a) )
3466 parseElementClass = Literal
3467
3468 symbols = []
3469 if isinstance(strs,basestring):
3470 symbols = strs.split()
3471 elif isinstance(strs, collections.Sequence):
3472 symbols = list(strs[:])
3473 elif isinstance(strs, _generatorType):
3474 symbols = list(strs)
3475 else:
3476 warnings.warn("Invalid argument to oneOf, expected string or list",
3477 SyntaxWarning, stacklevel=2)
3478 if not symbols:
3479 return NoMatch()
3480
3481 i = 0
3482 while i < len(symbols)-1:
3483 cur = symbols[i]
3484 for j,other in enumerate(symbols[i+1:]):
3485 if ( isequal(other, cur) ):
3486 del symbols[i+j+1]
3487 break
3488 elif ( masks(cur, other) ):
3489 del symbols[i+j+1]
3490 symbols.insert(i,other)
3491 cur = other
3492 break
3493 else:
3494 i += 1
3495
3496 if not caseless and useRegex:
3497
3498 try:
3499 if len(symbols)==len("".join(symbols)):
3500 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
3501 else:
3502 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
3503 except:
3504 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3505 SyntaxWarning, stacklevel=2)
3506
3507
3508
3509 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3510
3512 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3513 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3514 in the proper order. The key pattern can include delimiting markers or punctuation,
3515 as long as they are suppressed, thereby leaving the significant key text. The value
3516 pattern can include named results, so that the C{Dict} results can include named token
3517 fields.
3518 """
3519 return Dict( ZeroOrMore( Group ( key + value ) ) )
3520
3521 -def originalTextFor(expr, asString=True):
3522 """Helper to return the original, untokenized text for a given expression. Useful to
3523 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3524 revert separate tokens with intervening whitespace back to the original matching
3525 input text. By default, returns astring containing the original parsed text.
3526
3527 If the optional C{asString} argument is passed as C{False}, then the return value is a
3528 C{L{ParseResults}} containing any results names that were originally matched, and a
3529 single token containing the original matched text from the input string. So if
3530 the expression passed to C{L{originalTextFor}} contains expressions with defined
3531 results names, you must set C{asString} to C{False} if you want to preserve those
3532 results name values."""
3533 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3534 endlocMarker = locMarker.copy()
3535 endlocMarker.callPreparse = False
3536 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3537 if asString:
3538 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3539 else:
3540 def extractText(s,l,t):
3541 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3542 matchExpr.setParseAction(extractText)
3543 matchExpr.ignoreExprs = expr.ignoreExprs
3544 return matchExpr
3545
3547 """Helper to undo pyparsing's default grouping of And expressions, even
3548 if all but one are non-empty."""
3549 return TokenConverter(expr).setParseAction(lambda t:t[0])
3550
3552 """Helper to decorate a returned token with its starting and ending locations in the input string.
3553 This helper adds the following results names:
3554 - locn_start = location where matched expression begins
3555 - locn_end = location where matched expression ends
3556 - value = the actual parsed results
3557
3558 Be careful if the input text contains C{<TAB>} characters, you may want to call
3559 C{L{ParserElement.parseWithTabs}}
3560 """
3561 locator = Empty().setParseAction(lambda s,l,t: l)
3562 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3563
3564
3565
3566 empty = Empty().setName("empty")
3567 lineStart = LineStart().setName("lineStart")
3568 lineEnd = LineEnd().setName("lineEnd")
3569 stringStart = StringStart().setName("stringStart")
3570 stringEnd = StringEnd().setName("stringEnd")
3571
3572 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3573 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3574 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3575 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
3576 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3577 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3580 r"""Helper to easily define string ranges for use in Word construction. Borrows
3581 syntax from regexp '[]' string range definitions::
3582 srange("[0-9]") -> "0123456789"
3583 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3584 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3585 The input string must be enclosed in []'s, and the returned string is the expanded
3586 character set joined into a single string.
3587 The values enclosed in the []'s may be::
3588 a single character
3589 an escaped character with a leading backslash (such as \- or \])
3590 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3591 (\0x## is also supported for backwards compatibility)
3592 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3593 a range of any of the above, separated by a dash ('a-z', etc.)
3594 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3595 """
3596 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3597 try:
3598 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3599 except:
3600 return ""
3601
3603 """Helper method for defining parse actions that require matching at a specific
3604 column in the input text.
3605 """
3606 def verifyCol(strg,locn,toks):
3607 if col(locn,strg) != n:
3608 raise ParseException(strg,locn,"matched token not at column %d" % n)
3609 return verifyCol
3610
3612 """Helper method for common parse actions that simply return a literal value. Especially
3613 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3614 """
3615 return lambda s,l,t: [replStr]
3616
3618 """Helper parse action for removing quotation marks from parsed quoted strings.
3619 To use, add this parse action to quoted string using::
3620 quotedString.setParseAction( removeQuotes )
3621 """
3622 return t[0][1:-1]
3623
3625 """Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
3626 args are passed, they are forwarded to the given function as additional arguments after
3627 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
3628 parsed data to an integer using base 16.
3629 """
3630 def pa(s,l,t):
3631 t[:] = [func(tokn, *args) for tokn in t]
3632
3633 try:
3634 func_name = getattr(func, '__name__',
3635 getattr(func, '__class__').__name__)
3636 except Exception:
3637 func_name = str(func)
3638 pa.__name__ = func_name
3639
3640 return pa
3641
3642 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
3643 """Helper parse action to convert tokens to upper case."""
3644
3645 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
3646 """Helper parse action to convert tokens to lower case."""
3676
3680
3684
3686 """Helper to create a validating parse action to be used with start tags created
3687 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3688 with a required attribute value, to avoid false matches on common tags such as
3689 C{<TD>} or C{<DIV>}.
3690
3691 Call C{withAttribute} with a series of attribute names and values. Specify the list
3692 of filter attributes names and values as:
3693 - keyword arguments, as in C{(align="right")}, or
3694 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3695 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3696 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3697 For attribute names with a namespace prefix, you must use the second form. Attribute
3698 names are matched insensitive to upper/lower case.
3699
3700 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
3701
3702 To verify that the attribute exists, but without specifying a value, pass
3703 C{withAttribute.ANY_VALUE} as the value.
3704 """
3705 if args:
3706 attrs = args[:]
3707 else:
3708 attrs = attrDict.items()
3709 attrs = [(k,v) for k,v in attrs]
3710 def pa(s,l,tokens):
3711 for attrName,attrValue in attrs:
3712 if attrName not in tokens:
3713 raise ParseException(s,l,"no matching attribute " + attrName)
3714 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3715 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3716 (attrName, tokens[attrName], attrValue))
3717 return pa
3718 withAttribute.ANY_VALUE = object()
3719
3720 -def withClass(classname, namespace=''):
3721 """Simplified version of C{L{withAttribute}} when matching on a div class - made
3722 difficult because C{class} is a reserved word in Python.
3723 """
3724 classattr = "%s:class" % namespace if namespace else "class"
3725 return withAttribute(**{classattr : classname})
3726
3727 opAssoc = _Constants()
3728 opAssoc.LEFT = object()
3729 opAssoc.RIGHT = object()
3732 """Helper method for constructing grammars of expressions made up of
3733 operators working in a precedence hierarchy. Operators may be unary or
3734 binary, left- or right-associative. Parse actions can also be attached
3735 to operator expressions.
3736
3737 Parameters:
3738 - baseExpr - expression representing the most basic element for the nested
3739 - opList - list of tuples, one for each operator precedence level in the
3740 expression grammar; each tuple is of the form
3741 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3742 - opExpr is the pyparsing expression for the operator;
3743 may also be a string, which will be converted to a Literal;
3744 if numTerms is 3, opExpr is a tuple of two expressions, for the
3745 two operators separating the 3 terms
3746 - numTerms is the number of terms for this operator (must
3747 be 1, 2, or 3)
3748 - rightLeftAssoc is the indicator whether the operator is
3749 right or left associative, using the pyparsing-defined
3750 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3751 - parseAction is the parse action to be associated with
3752 expressions matching this operator expression (the
3753 parse action tuple member may be omitted)
3754 - lpar - expression for matching left-parentheses (default=Suppress('('))
3755 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3756 """
3757 ret = Forward()
3758 lastExpr = baseExpr | ( lpar + ret + rpar )
3759 for i,operDef in enumerate(opList):
3760 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3761 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
3762 if arity == 3:
3763 if opExpr is None or len(opExpr) != 2:
3764 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3765 opExpr1, opExpr2 = opExpr
3766 thisExpr = Forward().setName(termName)
3767 if rightLeftAssoc == opAssoc.LEFT:
3768 if arity == 1:
3769 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3770 elif arity == 2:
3771 if opExpr is not None:
3772 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3773 else:
3774 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3775 elif arity == 3:
3776 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3777 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3778 else:
3779 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3780 elif rightLeftAssoc == opAssoc.RIGHT:
3781 if arity == 1:
3782
3783 if not isinstance(opExpr, Optional):
3784 opExpr = Optional(opExpr)
3785 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3786 elif arity == 2:
3787 if opExpr is not None:
3788 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3789 else:
3790 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3791 elif arity == 3:
3792 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3793 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3794 else:
3795 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3796 else:
3797 raise ValueError("operator must indicate right or left associativity")
3798 if pa:
3799 matchExpr.setParseAction( pa )
3800 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
3801 lastExpr = thisExpr
3802 ret <<= lastExpr
3803 return ret
3804
3805 operatorPrecedence = infixNotation
3806 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
3807
3808 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
3809 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
3810 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
3811 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
3812 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3815 """Helper method for defining nested lists enclosed in opening and closing
3816 delimiters ("(" and ")" are the default).
3817
3818 Parameters:
3819 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3820 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3821 - content - expression for items within the nested lists (default=None)
3822 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3823
3824 If an expression is not provided for the content argument, the nested
3825 expression will capture all whitespace-delimited content between delimiters
3826 as a list of separate values.
3827
3828 Use the C{ignoreExpr} argument to define expressions that may contain
3829 opening or closing characters that should not be treated as opening
3830 or closing characters for nesting, such as quotedString or a comment
3831 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3832 The default is L{quotedString}, but if no expressions are to be ignored,
3833 then pass C{None} for this argument.
3834 """
3835 if opener == closer:
3836 raise ValueError("opening and closing strings cannot be the same")
3837 if content is None:
3838 if isinstance(opener,basestring) and isinstance(closer,basestring):
3839 if len(opener) == 1 and len(closer)==1:
3840 if ignoreExpr is not None:
3841 content = (Combine(OneOrMore(~ignoreExpr +
3842 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3843 ).setParseAction(lambda t:t[0].strip()))
3844 else:
3845 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3846 ).setParseAction(lambda t:t[0].strip()))
3847 else:
3848 if ignoreExpr is not None:
3849 content = (Combine(OneOrMore(~ignoreExpr +
3850 ~Literal(opener) + ~Literal(closer) +
3851 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3852 ).setParseAction(lambda t:t[0].strip()))
3853 else:
3854 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3855 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3856 ).setParseAction(lambda t:t[0].strip()))
3857 else:
3858 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3859 ret = Forward()
3860 if ignoreExpr is not None:
3861 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3862 else:
3863 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3864 ret.setName('nested %s%s expression' % (opener,closer))
3865 return ret
3866
3867 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3868 """Helper method for defining space-delimited indentation blocks, such as
3869 those used to define block statements in Python source code.
3870
3871 Parameters:
3872 - blockStatementExpr - expression defining syntax of statement that
3873 is repeated within the indented block
3874 - indentStack - list created by caller to manage indentation stack
3875 (multiple statementWithIndentedBlock expressions within a single grammar
3876 should share a common indentStack)
3877 - indent - boolean indicating whether block must be indented beyond the
3878 the current level; set to False for block of left-most statements
3879 (default=True)
3880
3881 A valid block must contain at least one C{blockStatement}.
3882 """
3883 def checkPeerIndent(s,l,t):
3884 if l >= len(s): return
3885 curCol = col(l,s)
3886 if curCol != indentStack[-1]:
3887 if curCol > indentStack[-1]:
3888 raise ParseFatalException(s,l,"illegal nesting")
3889 raise ParseException(s,l,"not a peer entry")
3890
3891 def checkSubIndent(s,l,t):
3892 curCol = col(l,s)
3893 if curCol > indentStack[-1]:
3894 indentStack.append( curCol )
3895 else:
3896 raise ParseException(s,l,"not a subentry")
3897
3898 def checkUnindent(s,l,t):
3899 if l >= len(s): return
3900 curCol = col(l,s)
3901 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3902 raise ParseException(s,l,"not an unindent")
3903 indentStack.pop()
3904
3905 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3906 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
3907 PEER = Empty().setParseAction(checkPeerIndent).setName('')
3908 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
3909 if indent:
3910 smExpr = Group( Optional(NL) +
3911
3912 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3913 else:
3914 smExpr = Group( Optional(NL) +
3915 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3916 blockStatementExpr.ignore(_bslash + LineEnd())
3917 return smExpr.setName('indented block')
3918
3919 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3920 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3921
3922 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
3923 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
3924 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3926 """Helper parser action to replace common HTML entities with their special characters"""
3927 return _htmlEntityMap.get(t.entity)
3928
3929
3930 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
3931 "Comment of the form C{/* ... */}"
3932
3933 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
3934 "Comment of the form C{<!-- ... -->}"
3935
3936 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
3937 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
3938 "Comment of the form C{// ... (to end of line)}"
3939
3940 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
3941 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
3942
3943 javaStyleComment = cppStyleComment
3944 "Same as C{L{cppStyleComment}}"
3945
3946 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3947 "Comment of the form C{# ... (to end of line)}"
3948
3949 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3950 Optional( Word(" \t") +
3951 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3952 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3953 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
3957 """
3958 Here are some common low-level expressions that may be useful in jump-starting parser development:
3959 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
3960 - common L{programming identifiers<identifier>}
3961 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
3962 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
3963 - L{UUID<uuid>}
3964 Parse actions:
3965 - C{L{convertToInteger}}
3966 - C{L{convertToFloat}}
3967 - C{L{convertToDate}}
3968 - C{L{convertToDatetime}}
3969 - C{L{stripHTMLTags}}
3970 """
3971
3972 convertToInteger = tokenMap(int)
3973 """
3974 Parse action for converting parsed integers to Python int
3975 """
3976
3977 convertToFloat = tokenMap(float)
3978 """
3979 Parse action for converting parsed numbers to Python float
3980 """
3981
3982 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
3983 """expression that parses an unsigned integer, returns an int"""
3984
3985 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
3986 """expression that parses a hexadecimal integer, returns an int"""
3987
3988 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
3989 """expression that parses an integer with optional leading sign, returns an int"""
3990
3991 fraction = (signedInteger.addParseAction(convertToFloat) + '/' + signedInteger.addParseAction(convertToFloat)).setName("fraction")
3992 """fractional expression of an integer divided by an integer, returns a float"""
3993 fraction.addParseAction(lambda t: t[0]/t[-1])
3994
3995 mixed_integer = (fraction | integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
3996 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
3997 mixed_integer.addParseAction(sum)
3998
3999 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
4000 """expression that parses a floating point number and returns a float"""
4001
4002 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
4003 """expression that parses a floating point number with optional scientific notation and returns a float"""
4004
4005
4006 numeric = (sciReal | real | signedInteger).streamline()
4007 """any numeric expression, returns the corresponding Python type"""
4008
4009 number = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("number").setParseAction(convertToFloat)
4010 """any int or real number, returned as float"""
4011
4012 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
4013 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
4014
4015 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
4016 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
4017
4018 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
4019 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
4020 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
4021 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
4022 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
4023 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
4024 "IPv6 address (long, short, or mixed form)"
4025
4026 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
4027 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
4028
4029 @staticmethod
4031 """
4032 Helper to create a parse action for converting parsed date string to Python datetime.date
4033
4034 Params -
4035 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
4036 """
4037 return lambda s,l,t: datetime.strptime(t[0], fmt).date()
4038
4039 @staticmethod
4041 """
4042 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
4043
4044 Params -
4045 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
4046 """
4047 return lambda s,l,t: datetime.strptime(t[0], fmt)
4048
4049 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
4050 "ISO8601 date (C{yyyy-mm-dd})"
4051
4052 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
4053 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
4054
4055 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
4056 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
4057
4058 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
4059 @staticmethod
4063
4064 if __name__ == "__main__":
4065
4066 selectToken = CaselessLiteral("select")
4067 fromToken = CaselessLiteral("from")
4068
4069 ident = Word(alphas, alphanums + "_$")
4070
4071 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
4072 columnNameList = Group(delimitedList(columnName)).setName("columns")
4073 columnSpec = ('*' | columnNameList)
4074
4075 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
4076 tableNameList = Group(delimitedList(tableName)).setName("tables")
4077
4078 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
4079
4080
4081 simpleSQL.runTests("""
4082 # '*' as column list and dotted table name
4083 select * from SYS.XYZZY
4084
4085 # caseless match on "SELECT", and casts back to "select"
4086 SELECT * from XYZZY, ABC
4087
4088 # list of column names, and mixed case SELECT keyword
4089 Select AA,BB,CC from Sys.dual
4090
4091 # multiple tables
4092 Select A, B, C from Sys.dual, Table2
4093
4094 # invalid SELECT keyword - should fail
4095 Xelect A, B, C from Sys.dual
4096
4097 # incomplete command - should fail
4098 Select
4099
4100 # invalid column name - should fail
4101 Select ^^^ frox Sys.dual
4102
4103 """)
4104
4105 pyparsing_common.numeric.runTests("""
4106 100
4107 -100
4108 +100
4109 3.14159
4110 6.02e23
4111 1e-12
4112 """)
4113
4114
4115 pyparsing_common.number.runTests("""
4116 100
4117 -100
4118 +100
4119 3.14159
4120 6.02e23
4121 1e-12
4122 """)
4123
4124 pyparsing_common.hex_integer.runTests("""
4125 100
4126 FF
4127 """)
4128
4129 import uuid
4130 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
4131 pyparsing_common.uuid.runTests("""
4132 12345678-1234-5678-1234-567812345678
4133 """)
4134