regexp_object.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *  Copyright (C) 2003 Apple Computer, Inc.
00006  *
00007  *  This library is free software; you can redistribute it and/or
00008  *  modify it under the terms of the GNU Lesser General Public
00009  *  License as published by the Free Software Foundation; either
00010  *  version 2 of the License, or (at your option) any later version.
00011  *
00012  *  This library is distributed in the hope that it will be useful,
00013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  *  Lesser General Public License for more details.
00016  *
00017  *  You should have received a copy of the GNU Lesser General Public
00018  *  License along with this library; if not, write to the Free Software
00019  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00020  *
00021  */
00022 
00023 #include <stdio.h>
00024 
00025 #include "value.h"
00026 #include "object.h"
00027 #include "types.h"
00028 #include "interpreter.h"
00029 #include "operations.h"
00030 #include "internal.h"
00031 #include "regexp.h"
00032 #include "regexp_object.h"
00033 #include "error_object.h"
00034 #include "lookup.h"
00035 
00036 using namespace KJS;
00037 
00038 // ------------------------------ RegExpPrototypeImp ---------------------------
00039 
00040 // ECMA 15.9.4
00041 
00042 const ClassInfo RegExpPrototypeImp::info = {"RegExp", 0, 0, 0};
00043 
00044 RegExpPrototypeImp::RegExpPrototypeImp(ExecState *exec,
00045                                        ObjectPrototypeImp *objProto,
00046                                        FunctionPrototypeImp *funcProto)
00047   : ObjectImp(objProto)
00048 {
00049   Value protect(this);
00050   setInternalValue(String(""));
00051 
00052   // The constructor will be added later in RegExpObject's constructor (?)
00053 
00054   static const Identifier execPropertyName("exec");
00055   putDirect(execPropertyName,
00056         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Exec,     0, execPropertyName), DontEnum);
00057   static const Identifier testPropertyName("test");
00058   putDirect(testPropertyName,
00059         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Test,     0, testPropertyName), DontEnum);
00060   putDirect(toStringPropertyName,
00061         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::ToString, 0, toStringPropertyName), DontEnum);
00062 }
00063 
00064 // ------------------------------ RegExpProtoFuncImp ---------------------------
00065 
00066 RegExpProtoFuncImp::RegExpProtoFuncImp(ExecState * /*exec*/, FunctionPrototypeImp *funcProto,
00067                                        int i, int len, const Identifier &_ident)
00068   : InternalFunctionImp(funcProto), id(i)
00069 {
00070   Value protect(this);
00071   putDirect(lengthPropertyName, len, DontDelete|ReadOnly|DontEnum);
00072   ident = _ident;
00073 }
00074 
00075 bool RegExpProtoFuncImp::implementsCall() const
00076 {
00077   return true;
00078 }
00079 
00080 Value RegExpProtoFuncImp::call(ExecState *exec, Object &thisObj, const List &args)
00081 {
00082   if (!thisObj.inherits(&RegExpImp::info)) {
00083     if (thisObj.inherits(&RegExpPrototypeImp::info)) {
00084       switch (id) {
00085         case ToString: return String("//"); // FireFox returns /(?:)/
00086       }
00087     }
00088     Object err = Error::create(exec,TypeError);
00089     exec->setException(err);
00090     return err;
00091   }
00092 
00093   RegExpImp *reimp = static_cast<RegExpImp*>(thisObj.imp());
00094   RegExp *re = reimp->regExp();
00095   String s;
00096   UString str;
00097   switch (id) {
00098   case Exec:      // 15.10.6.2
00099   case Test:
00100   {
00101     s = args[0].toString(exec);
00102     int length = s.value().size();
00103 
00104     // Get values from the last time (in case of /g)
00105     Value lastIndex = thisObj.get(exec,"lastIndex");
00106     int i = lastIndex.isValid() ? lastIndex.toInt32(exec) : 0;
00107     bool globalFlag = thisObj.get(exec,"global").toBoolean(exec);
00108     if (!globalFlag)
00109       i = 0;
00110     if (i < 0 || i > length) {
00111       thisObj.put(exec,"lastIndex", Number(0), DontDelete | DontEnum);
00112       if (id == Test)
00113         return Boolean(false);
00114       else
00115         return Null();
00116     }
00117     RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp().imp());
00118     int **ovector = regExpObj->registerRegexp( re, s.value() );
00119 
00120     str = re->match(s.value(), i, 0L, ovector);
00121     regExpObj->setSubPatterns(re->subPatterns());
00122 
00123     if (id == Test)
00124       return Boolean(!str.isNull());
00125 
00126     if (str.isNull()) // no match
00127     {
00128       if (globalFlag)
00129         thisObj.put(exec,"lastIndex",Number(0), DontDelete | DontEnum);
00130       return Null();
00131     }
00132     else // success
00133     {
00134       if (globalFlag)
00135         thisObj.put(exec,"lastIndex",Number( (*ovector)[1] ), DontDelete | DontEnum);
00136       return regExpObj->arrayOfMatches(exec,str);
00137     }
00138   }
00139   break;
00140   case ToString:
00141     s = thisObj.get(exec,"source").toString(exec);
00142     str = "/";
00143     str += s.value();
00144     str += "/";
00145     if (thisObj.get(exec,"global").toBoolean(exec)) {
00146       str += "g";
00147     }
00148     if (thisObj.get(exec,"ignoreCase").toBoolean(exec)) {
00149       str += "i";
00150     }
00151     if (thisObj.get(exec,"multiline").toBoolean(exec)) {
00152       str += "m";
00153     }
00154     return String(str);
00155   }
00156 
00157   return Undefined();
00158 }
00159 
00160 // ------------------------------ RegExpImp ------------------------------------
00161 
00162 const ClassInfo RegExpImp::info = {"RegExp", 0, 0, 0};
00163 
00164 RegExpImp::RegExpImp(RegExpPrototypeImp *regexpProto)
00165   : ObjectImp(regexpProto), reg(0L)
00166 {
00167 }
00168 
00169 RegExpImp::~RegExpImp()
00170 {
00171   delete reg;
00172 }
00173 
00174 // ------------------------------ RegExpObjectImp ------------------------------
00175 
00176 RegExpObjectImp::RegExpObjectImp(ExecState * /*exec*/,
00177                                  FunctionPrototypeImp *funcProto,
00178                                  RegExpPrototypeImp *regProto)
00179 
00180   : InternalFunctionImp(funcProto), lastOvector(0L), lastNrSubPatterns(0)
00181 {
00182   Value protect(this);
00183   // ECMA 15.10.5.1 RegExp.prototype
00184   putDirect(prototypePropertyName, regProto, DontEnum|DontDelete|ReadOnly);
00185 
00186   // no. of arguments for constructor
00187   putDirect(lengthPropertyName, NumberImp::two(), ReadOnly|DontDelete|DontEnum);
00188 }
00189 
00190 RegExpObjectImp::~RegExpObjectImp()
00191 {
00192   delete [] lastOvector;
00193 }
00194 
00195 int **RegExpObjectImp::registerRegexp( const RegExp* re, const UString& s )
00196 {
00197   lastString = s;
00198   delete [] lastOvector;
00199   lastOvector = 0;
00200   lastNrSubPatterns = re->subPatterns();
00201   return &lastOvector;
00202 }
00203 
00204 Object RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const
00205 {
00206   List list;
00207   // The returned array contains 'result' as first item, followed by the list of matches
00208   list.append(String(result));
00209   if ( lastOvector )
00210     for ( unsigned int i = 1 ; i < lastNrSubPatterns + 1 ; ++i )
00211     {
00212       UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
00213       list.append(String(substring));
00214     }
00215   Object arr = exec->lexicalInterpreter()->builtinArray().construct(exec, list);
00216   arr.put(exec, "index", Number(lastOvector[0]));
00217   arr.put(exec, "input", String(lastString));
00218   return arr;
00219 }
00220 
00221 Value RegExpObjectImp::get(ExecState *exec, const Identifier &p) const
00222 {
00223   UString s = p.ustring();
00224   if (s[0] == '$' && lastOvector)
00225   {
00226     bool ok;
00227     unsigned long i = s.substr(1).toULong(&ok);
00228     if (ok)
00229     {
00230       if (i < lastNrSubPatterns + 1)
00231       {
00232         UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
00233         return String(substring);
00234       }
00235       return String("");
00236     }
00237   }
00238   return InternalFunctionImp::get(exec, p);
00239 }
00240 
00241 bool RegExpObjectImp::implementsConstruct() const
00242 {
00243   return true;
00244 }
00245 
00246 // ECMA 15.10.4
00247 Object RegExpObjectImp::construct(ExecState *exec, const List &args)
00248 {
00249   UString p;
00250   if (args.isEmpty()) {
00251       p = "";
00252   } else {
00253     Value a0 = args[0];
00254     if (a0.isA(ObjectType) && a0.toObject(exec).inherits(&RegExpImp::info)) {
00255       // It's a regexp. Check that no flags were passed.
00256       if (args.size() > 1 && args[1].type() != UndefinedType) {
00257           Object err = Error::create(exec,TypeError);
00258           exec->setException(err);
00259           return err;
00260       }
00261       RegExpImp *rimp = static_cast<RegExpImp*>(Object::dynamicCast(a0).imp());
00262       p = rimp->regExp()->pattern();
00263     } else {
00264       p = a0.toString(exec);
00265     }
00266   }
00267   UString flags = args[1].type() == UndefinedType ? UString("") : args[1].toString(exec);
00268 
00269   RegExpPrototypeImp *proto = static_cast<RegExpPrototypeImp*>(exec->lexicalInterpreter()->builtinRegExpPrototype().imp());
00270   RegExpImp *dat = new RegExpImp(proto);
00271   Object obj(dat); // protect from GC
00272 
00273   bool global = (flags.find("g") >= 0);
00274   bool ignoreCase = (flags.find("i") >= 0);
00275   bool multiline = (flags.find("m") >= 0);
00276   // TODO: throw a syntax error on invalid flags
00277 
00278   dat->putDirect("global", global ? BooleanImp::staticTrue : BooleanImp::staticFalse, DontDelete | ReadOnly | DontEnum);
00279   dat->putDirect("ignoreCase", ignoreCase ? BooleanImp::staticTrue : BooleanImp::staticFalse, DontDelete | ReadOnly | DontEnum);
00280   dat->putDirect("multiline", multiline ? BooleanImp::staticTrue : BooleanImp::staticFalse, DontDelete | ReadOnly | DontEnum);
00281 
00282   dat->putDirect("source", new StringImp(p), DontDelete | ReadOnly | DontEnum);
00283   dat->putDirect("lastIndex", NumberImp::zero(), DontDelete | DontEnum);
00284 
00285   int reflags = RegExp::None;
00286   if (global)
00287       reflags |= RegExp::Global;
00288   if (ignoreCase)
00289       reflags |= RegExp::IgnoreCase;
00290   if (multiline)
00291       reflags |= RegExp::Multiline;
00292   RegExp *re = new RegExp(p, reflags);
00293   if (!re->isValid()) {
00294     Object err = Error::create(exec, SyntaxError,
00295                                "Invalid regular expression");
00296     exec->setException(err);
00297     return err;
00298   }
00299   dat->setRegExp(re);
00300 
00301   return obj;
00302 }
00303 
00304 bool RegExpObjectImp::implementsCall() const
00305 {
00306   return true;
00307 }
00308 
00309 // ECMA 15.10.3
00310 Value RegExpObjectImp::call(ExecState *exec, Object &/*thisObj*/,
00311                 const List &args)
00312 {
00313   // TODO: handle RegExp argument case (15.10.3.1)
00314 
00315   return construct(exec, args);
00316 }
KDE Home | KDE Accessibility Home | Description of Access Keys