00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef REGEX_H
00017 #define REGEX_H
00018
00019
00020
00045 #include "unicode/utypes.h"
00046
00047 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00048
00049 #include "unicode/uobject.h"
00050 #include "unicode/unistr.h"
00051 #include "unicode/parseerr.h"
00052
00053 #include "unicode/uregex.h"
00054
00055 U_NAMESPACE_BEGIN
00056
00057
00058
00059
00060 class RegexMatcher;
00061 class RegexPattern;
00062 class UVector;
00063 class UVector32;
00064 class UnicodeSet;
00065 struct REStackFrame;
00066 struct Regex8BitSet;
00067 class RuleBasedBreakIterator;
00068 class RegexCImpl;
00069
00070
00071
00072
00077 #ifdef REGEX_DEBUG
00078 U_INTERNAL void U_EXPORT2
00079 RegexPatternDump(const RegexPattern *pat);
00080 #else
00081 #define RegexPatternDump(pat)
00082 #endif
00083
00084
00085
00097 class U_I18N_API RegexPattern: public UObject {
00098 public:
00099
00107 RegexPattern();
00108
00115 RegexPattern(const RegexPattern &source);
00116
00122 virtual ~RegexPattern();
00123
00132 UBool operator==(const RegexPattern& that) const;
00133
00142 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);};
00143
00149 RegexPattern &operator =(const RegexPattern &source);
00150
00158 virtual RegexPattern *clone() const;
00159
00160
00185 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00186 UParseError &pe,
00187 UErrorCode &status);
00188
00213 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00214 uint32_t flags,
00215 UParseError &pe,
00216 UErrorCode &status);
00217
00218
00241 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00242 uint32_t flags,
00243 UErrorCode &status);
00244
00245
00251 virtual uint32_t flags() const;
00252
00270 virtual RegexMatcher *matcher(const UnicodeString &input,
00271 UErrorCode &status) const;
00272
00273 private:
00285 RegexMatcher *matcher(const UChar *input,
00286 UErrorCode &status) const;
00287 public:
00288
00289
00301 virtual RegexMatcher *matcher(UErrorCode &status) const;
00302
00303
00318 static UBool U_EXPORT2 matches(const UnicodeString ®ex,
00319 const UnicodeString &input,
00320 UParseError &pe,
00321 UErrorCode &status);
00322
00323
00328 virtual UnicodeString pattern() const;
00329
00330
00356 virtual int32_t split(const UnicodeString &input,
00357 UnicodeString dest[],
00358 int32_t destCapacity,
00359 UErrorCode &status) const;
00360
00361
00367 virtual UClassID getDynamicClassID() const;
00368
00374 static UClassID U_EXPORT2 getStaticClassID();
00375
00376 private:
00377
00378
00379
00380 UnicodeString fPattern;
00381 uint32_t fFlags;
00382
00383 UVector32 *fCompiledPat;
00384 UnicodeString fLiteralText;
00385
00386
00387 UVector *fSets;
00388 Regex8BitSet *fSets8;
00389
00390
00391 UErrorCode fDeferredStatus;
00392
00393
00394 int32_t fMinMatchLen;
00395
00396
00397
00398
00399 int32_t fFrameSize;
00400
00401
00402 int32_t fDataSize;
00403
00404
00405
00406 UVector32 *fGroupMap;
00407
00408
00409 int32_t fMaxCaptureDigits;
00410
00411 UnicodeSet **fStaticSets;
00412
00413
00414 Regex8BitSet *fStaticSets8;
00415
00416
00417 int32_t fStartType;
00418 int32_t fInitialStringIdx;
00419 int32_t fInitialStringLen;
00420 UnicodeSet *fInitialChars;
00421 UChar32 fInitialChar;
00422 Regex8BitSet *fInitialChars8;
00423
00424 friend class RegexCompile;
00425 friend class RegexMatcher;
00426 friend class RegexCImpl;
00427
00428
00429
00430
00431 void init();
00432 void zap();
00433 #ifdef REGEX_DEBUG
00434 void dumpOp(int32_t index) const;
00435 friend void U_EXPORT2 RegexPatternDump(const RegexPattern *);
00436 #endif
00437
00438 };
00439
00440
00441
00451 class U_I18N_API RegexMatcher: public UObject {
00452 public:
00453
00468 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00469
00491 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00492 uint32_t flags, UErrorCode &status);
00493
00494 private:
00506 RegexMatcher(const UnicodeString ®exp, const UChar *input,
00507 uint32_t flags, UErrorCode &status);
00508 public:
00509
00510
00516 virtual ~RegexMatcher();
00517
00518
00525 virtual UBool matches(UErrorCode &status);
00526
00537 virtual UBool matches(int32_t startIndex, UErrorCode &status);
00538
00539
00540
00541
00555 virtual UBool lookingAt(UErrorCode &status);
00556
00557
00571 virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
00572
00585 virtual UBool find();
00586
00587
00597 virtual UBool find(int32_t start, UErrorCode &status);
00598
00599
00609 virtual UnicodeString group(UErrorCode &status) const;
00610
00611
00624 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00625
00626
00632 virtual int32_t groupCount() const;
00633
00634
00642 virtual int32_t start(UErrorCode &status) const;
00643
00644
00658 virtual int32_t start(int32_t group, UErrorCode &status) const;
00659
00660
00670 virtual int32_t end(UErrorCode &status) const;
00671
00672
00686 virtual int32_t end(int32_t group, UErrorCode &status) const;
00687
00688
00697 virtual RegexMatcher &reset();
00698
00699
00715 virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
00716
00717
00731 virtual RegexMatcher &reset(const UnicodeString &input);
00732
00733 private:
00745 RegexMatcher &reset(const UChar *input);
00746 public:
00747
00754 virtual const UnicodeString &input() const;
00755
00756
00757
00776 virtual RegexMatcher ®ion(int32_t start, int32_t limit, UErrorCode &status);
00777
00778
00787 virtual int32_t regionStart() const;
00788
00789
00798 virtual int32_t regionEnd() const;
00799
00808 virtual UBool hasTransparentBounds() const;
00809
00828 virtual RegexMatcher &useTransparentBounds(UBool b);
00829
00830
00838 virtual UBool hasAnchoringBounds() const;
00839
00852 virtual RegexMatcher &useAnchoringBounds(UBool b);
00853
00866 virtual UBool hitEnd() const;
00867
00877 virtual UBool requireEnd() const;
00878
00879
00880
00881
00882
00888 virtual const RegexPattern &pattern() const;
00889
00890
00907 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
00908
00909
00930 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
00931
00959 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
00960 const UnicodeString &replacement, UErrorCode &status);
00961
00962
00973 virtual UnicodeString &appendTail(UnicodeString &dest);
00974
00975
00976
01001 virtual int32_t split(const UnicodeString &input,
01002 UnicodeString dest[],
01003 int32_t destCapacity,
01004 UErrorCode &status);
01005
01027 virtual void setTimeLimit(int32_t limit, UErrorCode &status);
01028
01035 virtual int32_t getTimeLimit() const;
01036
01058 virtual void setStackLimit(int32_t limit, UErrorCode &status);
01059
01067 virtual int32_t getStackLimit() const;
01068
01069
01083 virtual void setMatchCallback(URegexMatchCallback *callback,
01084 const void *context,
01085 UErrorCode &status);
01086
01087
01088
01099 virtual void getMatchCallback(URegexMatchCallback *&callback,
01100 const void *&context,
01101 UErrorCode &status);
01102
01103
01109 void setTrace(UBool state);
01110
01111
01117 static UClassID U_EXPORT2 getStaticClassID();
01118
01124 virtual UClassID getDynamicClassID() const;
01125
01126 private:
01127
01128
01129 RegexMatcher();
01130 RegexMatcher(const RegexPattern *pat);
01131 RegexMatcher(const RegexMatcher &other);
01132 RegexMatcher &operator =(const RegexMatcher &rhs);
01133 void init(UErrorCode &status);
01134 void init2(const UnicodeString &s, UErrorCode &e);
01135
01136 friend class RegexPattern;
01137 friend class RegexCImpl;
01138 public:
01140 void resetPreserveRegion();
01141 private:
01142
01143
01144
01145
01146
01147 void MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
01148 inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
01149 UBool isWordBoundary(int32_t pos);
01150 UBool isUWordBoundary(int32_t pos);
01151 REStackFrame *resetStack();
01152 inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
01153 void IncrementTime(UErrorCode &status);
01154
01155
01156 const RegexPattern *fPattern;
01157 RegexPattern *fPatternOwned;
01158
01159
01160 const UnicodeString *fInput;
01161 int32_t fFrameSize;
01162
01163 int32_t fRegionStart;
01164 int32_t fRegionLimit;
01165
01166 int32_t fAnchorStart;
01167 int32_t fAnchorLimit;
01168
01169 int32_t fLookStart;
01170 int32_t fLookLimit;
01171
01172
01173 int32_t fActiveStart;
01174 int32_t fActiveLimit;
01175
01176
01177
01178 UBool fTransparentBounds;
01179 UBool fAnchoringBounds;
01180
01181 UBool fMatch;
01182 int32_t fMatchStart;
01183 int32_t fMatchEnd;
01184
01185
01186 int32_t fLastMatchEnd;
01187
01188 int32_t fAppendPosition;
01189
01190
01191
01192 UBool fHitEnd;
01193 UBool fRequireEnd;
01194
01195
01196 UVector32 *fStack;
01197 REStackFrame *fFrame;
01198
01199
01200
01201 int32_t *fData;
01202 int32_t fSmallData[8];
01203
01204 int32_t fTimeLimit;
01205
01206
01207 int32_t fTime;
01208 int32_t fTickCounter;
01209
01210
01211
01212
01213 int32_t fStackLimit;
01214
01215
01216 URegexMatchCallback *fCallbackFn;
01217
01218 const void *fCallbackContext;
01219
01220 UBool fTraceDebug;
01221
01222 UErrorCode fDeferredStatus;
01223
01224
01225 RuleBasedBreakIterator *fWordBreakItr;
01226
01227
01228 };
01229
01230 U_NAMESPACE_END
01231 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
01232 #endif