lexer.h

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Library General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Library General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Library General Public License
00017  *  along with this library; see the file COPYING.LIB.  If not, write to
00018  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *  Boston, MA 02110-1301, USA.
00020  *
00021  */
00022 
00023 #ifndef _KJSLEXER_H_
00024 #define _KJSLEXER_H_
00025 
00026 #include "ustring.h"
00027 
00028 
00029 namespace KJS {
00030 
00031   class Identifier;
00032 
00033   class RegExp;
00034 
00035   class Lexer {
00036   public:
00037     Lexer();
00038     ~Lexer();
00039     static Lexer *curr();
00040 
00041     void setCode(const UChar *c, unsigned int len);
00042     int lex();
00043 
00044     int lineNo() const { return yylineno + 1; }
00045 
00046     bool prevTerminator() const { return terminator; }
00047 
00048     enum State { Start,
00049                  IdentifierOrKeyword,
00050                  Identifier,
00051                  InIdentifierOrKeyword,
00052                  InIdentifier,
00053                  InIdentifierUnicodeEscapeStart,
00054                  InIdentifierUnicodeEscape,
00055                  InSingleLineComment,
00056                  InMultiLineComment,
00057                  InNum,
00058                  InNum0,
00059                  InHex,
00060                  InOctal,
00061                  InDecimal,
00062                  InExponentIndicator,
00063                  InExponent,
00064                  Hex,
00065                  Octal,
00066                  Number,
00067                  String,
00068                  Eof,
00069                  InString,
00070                  InEscapeSequence,
00071                  InHexEscape,
00072                  InUnicodeEscape,
00073                  Other,
00074                  Bad };
00075 
00076     bool scanRegExp();
00077     UString pattern, flags;
00078     bool hadError() const { return foundBad; }
00079 
00080     static bool isWhiteSpace(unsigned short c);
00081     static bool isIdentLetter(unsigned short c);
00082     static bool isDecimalDigit(unsigned short c);
00083     static bool isHexDigit(unsigned short c);
00084     static bool isOctalDigit(unsigned short c);
00085 
00086   private:
00087     int yylineno;
00088     bool done;
00089     char *buffer8;
00090     UChar *buffer16;
00091     unsigned int size8, size16;
00092     unsigned int pos8, pos16;
00093     bool terminator;
00094     bool restrKeyword;
00095     // encountered delimiter like "'" and "}" on last run
00096     bool delimited;
00097     bool skipLF;
00098     bool skipCR;
00099     bool convertNextIdentifier;
00100     int stackToken;
00101     int lastToken;
00102     bool foundBad;
00103 
00104     State state;
00105     void setDone(State s);
00106     unsigned int pos;
00107     void shift(unsigned int p);
00108     void nextLine();
00109     int lookupKeyword(const char *);
00110 
00111     int matchPunctuator(unsigned short c1, unsigned short c2,
00112                         unsigned short c3, unsigned short c4);
00113     unsigned short singleEscape(unsigned short c) const;
00114     unsigned short convertOctal(unsigned short c1, unsigned short c2,
00115                                 unsigned short c3) const;
00116   public:
00117     static unsigned char convertHex(unsigned short c1);
00118     static unsigned char convertHex(unsigned short c1, unsigned short c2);
00119     static UChar convertUnicode(unsigned short c1, unsigned short c2,
00120                                 unsigned short c3, unsigned short c4);
00121 
00122 #ifdef KJS_DEBUG_MEM
00123 
00126     static void globalClear();
00127 #endif
00128 
00129     void doneParsing();
00130 
00131   private:
00132 
00133     void record8(unsigned short c);
00134     void record16(int c);
00135     void record16(UChar c);
00136 
00137     KJS::Identifier *makeIdentifier(UChar *buffer, unsigned int pos);
00138     UString *makeUString(UChar *buffer, unsigned int pos);
00139 
00140     const UChar *code;
00141     unsigned int length;
00142     int yycolumn;
00143 #ifndef KJS_PURE_ECMA
00144     int bol;     // begin of line
00145 #endif
00146 
00147     // current and following unicode characters (int to allow for -1 for end-of-file marker)
00148     int current, next1, next2, next3;
00149 
00150     UString **strings;
00151     unsigned int numStrings;
00152     unsigned int stringsCapacity;
00153 
00154     KJS::Identifier **identifiers;
00155     unsigned int numIdentifiers;
00156     unsigned int identifiersCapacity;
00157 
00158     // for future extensions
00159     class LexerPrivate;
00160     LexerPrivate *priv;
00161   };
00162 
00163 } // namespace
00164 
00165 #endif
KDE Home | KDE Accessibility Home | Description of Access Keys