00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017
00023 U_NAMESPACE_BEGIN
00024
00025 class BMPSet;
00026 class ParsePosition;
00027 class SymbolTable;
00028 class UnicodeSetStringSpan;
00029 class UVector;
00030 class RuleCharacterIterator;
00031
00272 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00273
00274 int32_t len;
00275 int32_t capacity;
00276 UChar32* list;
00277 BMPSet *bmpSet;
00278 UChar32* buffer;
00279 int32_t bufferCapacity;
00280 int32_t patLen;
00281
00291 UChar *pat;
00292 UVector* strings;
00293 UnicodeSetStringSpan *stringSpan;
00294
00295 private:
00296 enum {
00297 kIsBogus = 1
00298 };
00299 uint8_t fFlags;
00300 public:
00310 inline UBool isBogus(void) const;
00311
00328 void setToBogus();
00329
00330 public:
00331
00332 enum {
00337 MIN_VALUE = 0,
00338
00343 MAX_VALUE = 0x10ffff
00344 };
00345
00346
00347
00348
00349
00350 public:
00351
00356 UnicodeSet();
00357
00366 UnicodeSet(UChar32 start, UChar32 end);
00367
00376 UnicodeSet(const UnicodeString& pattern,
00377 UErrorCode& status);
00378
00391 UnicodeSet(const UnicodeString& pattern,
00392 uint32_t options,
00393 const SymbolTable* symbols,
00394 UErrorCode& status);
00395
00409 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00410 uint32_t options,
00411 const SymbolTable* symbols,
00412 UErrorCode& status);
00413
00418 UnicodeSet(const UnicodeSet& o);
00419
00424 virtual ~UnicodeSet();
00425
00431 UnicodeSet& operator=(const UnicodeSet& o);
00432
00444 virtual UBool operator==(const UnicodeSet& o) const;
00445
00451 UBool operator!=(const UnicodeSet& o) const;
00452
00462 virtual UnicodeFunctor* clone() const;
00463
00471 virtual int32_t hashCode(void) const;
00472
00473
00474
00475
00476
00485 inline UBool isFrozen() const;
00486
00500 UnicodeFunctor *freeze();
00501
00510 UnicodeFunctor *cloneAsThawed() const;
00511
00512
00513
00514
00515
00526 UnicodeSet& set(UChar32 start, UChar32 end);
00527
00533 static UBool resemblesPattern(const UnicodeString& pattern,
00534 int32_t pos);
00535
00548 UnicodeSet& applyPattern(const UnicodeString& pattern,
00549 UErrorCode& status);
00550
00567 UnicodeSet& applyPattern(const UnicodeString& pattern,
00568 uint32_t options,
00569 const SymbolTable* symbols,
00570 UErrorCode& status);
00571
00603 UnicodeSet& applyPattern(const UnicodeString& pattern,
00604 ParsePosition& pos,
00605 uint32_t options,
00606 const SymbolTable* symbols,
00607 UErrorCode& status);
00608
00622 virtual UnicodeString& toPattern(UnicodeString& result,
00623 UBool escapeUnprintable = FALSE) const;
00624
00647 UnicodeSet& applyIntPropertyValue(UProperty prop,
00648 int32_t value,
00649 UErrorCode& ec);
00650
00680 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00681 const UnicodeString& value,
00682 UErrorCode& ec);
00683
00692 virtual int32_t size(void) const;
00693
00700 virtual UBool isEmpty(void) const;
00701
00709 virtual UBool contains(UChar32 c) const;
00710
00719 virtual UBool contains(UChar32 start, UChar32 end) const;
00720
00728 UBool contains(const UnicodeString& s) const;
00729
00737 virtual UBool containsAll(const UnicodeSet& c) const;
00738
00746 UBool containsAll(const UnicodeString& s) const;
00747
00756 UBool containsNone(UChar32 start, UChar32 end) const;
00757
00765 UBool containsNone(const UnicodeSet& c) const;
00766
00774 UBool containsNone(const UnicodeString& s) const;
00775
00784 inline UBool containsSome(UChar32 start, UChar32 end) const;
00785
00793 inline UBool containsSome(const UnicodeSet& s) const;
00794
00802 inline UBool containsSome(const UnicodeString& s) const;
00803
00822 int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00823
00841 int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00842
00861 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00862
00880 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00881
00886 virtual UMatchDegree matches(const Replaceable& text,
00887 int32_t& offset,
00888 int32_t limit,
00889 UBool incremental);
00890
00891 private:
00913 static int32_t matchRest(const Replaceable& text,
00914 int32_t start, int32_t limit,
00915 const UnicodeString& s);
00916
00926 int32_t findCodePoint(UChar32 c) const;
00927
00928 public:
00929
00937 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00938
00947 int32_t indexOf(UChar32 c) const;
00948
00958 UChar32 charAt(int32_t index) const;
00959
00974 virtual UnicodeSet& add(UChar32 start, UChar32 end);
00975
00983 UnicodeSet& add(UChar32 c);
00984
00996 UnicodeSet& add(const UnicodeString& s);
00997
00998 private:
01004 static int32_t getSingleCP(const UnicodeString& s);
01005
01006 void _add(const UnicodeString& s);
01007
01008 public:
01017 UnicodeSet& addAll(const UnicodeString& s);
01018
01027 UnicodeSet& retainAll(const UnicodeString& s);
01028
01037 UnicodeSet& complementAll(const UnicodeString& s);
01038
01047 UnicodeSet& removeAll(const UnicodeString& s);
01048
01057 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01058
01059
01067 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01068
01082 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01083
01084
01090 UnicodeSet& retain(UChar32 c);
01091
01105 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01106
01114 UnicodeSet& remove(UChar32 c);
01115
01125 UnicodeSet& remove(const UnicodeString& s);
01126
01134 virtual UnicodeSet& complement(void);
01135
01150 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01151
01159 UnicodeSet& complement(UChar32 c);
01160
01171 UnicodeSet& complement(const UnicodeString& s);
01172
01185 virtual UnicodeSet& addAll(const UnicodeSet& c);
01186
01198 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01199
01211 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01212
01223 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01224
01231 virtual UnicodeSet& clear(void);
01232
01258 UnicodeSet& closeOver(int32_t attribute);
01259
01266 virtual UnicodeSet &removeAllStrings();
01267
01275 virtual int32_t getRangeCount(void) const;
01276
01284 virtual UChar32 getRangeStart(int32_t index) const;
01285
01293 virtual UChar32 getRangeEnd(int32_t index) const;
01294
01343 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01344
01351 virtual UnicodeSet& compact();
01352
01364 static UClassID U_EXPORT2 getStaticClassID(void);
01365
01374 virtual UClassID getDynamicClassID(void) const;
01375
01376 private:
01377
01378
01379
01380 friend class USetAccess;
01381
01382 int32_t getStringCount() const;
01383
01384 const UnicodeString* getString(int32_t index) const;
01385
01386
01387
01388
01389
01390 private:
01391
01397 virtual UBool matchesIndexValue(uint8_t v) const;
01398
01399 private:
01400
01401
01402
01403
01404
01405 UnicodeSet(const UnicodeSet& o, UBool );
01406
01407
01408
01409
01410
01411 void applyPattern(RuleCharacterIterator& chars,
01412 const SymbolTable* symbols,
01413 UnicodeString& rebuiltPat,
01414 uint32_t options,
01415 UErrorCode& ec);
01416
01417
01418
01419
01420
01421 void ensureCapacity(int32_t newLen, UErrorCode& ec);
01422
01423 void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01424
01425 void swapBuffers(void);
01426
01427 UBool allocateStrings(UErrorCode &status);
01428
01429 UnicodeString& _toPattern(UnicodeString& result,
01430 UBool escapeUnprintable) const;
01431
01432 UnicodeString& _generatePattern(UnicodeString& result,
01433 UBool escapeUnprintable) const;
01434
01435 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01436
01437 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01438
01439
01440
01441
01442
01443 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01444
01445 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01446
01447 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01448
01454 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01455 int32_t pos);
01456
01457 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01458 int32_t iterOpts);
01459
01498 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01499 ParsePosition& ppos,
01500 UErrorCode &ec);
01501
01502 void applyPropertyPattern(RuleCharacterIterator& chars,
01503 UnicodeString& rebuiltPat,
01504 UErrorCode& ec);
01505
01506 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01507
01512 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01513
01523 void applyFilter(Filter filter,
01524 void* context,
01525 int32_t src,
01526 UErrorCode &status);
01527
01531 void setPattern(const UnicodeString& newPat);
01535 void releasePattern();
01536
01537 friend class UnicodeSetIterator;
01538 };
01539
01540 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01541 return !operator==(o);
01542 }
01543
01544 inline UBool UnicodeSet::isFrozen() const {
01545 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01546 }
01547
01548 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01549 return !containsNone(start, end);
01550 }
01551
01552 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01553 return !containsNone(s);
01554 }
01555
01556 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01557 return !containsNone(s);
01558 }
01559
01560 inline UBool UnicodeSet::isBogus() const {
01561 return (UBool)(fFlags & kIsBogus);
01562 }
01563
01564 U_NAMESPACE_END
01565
01566 #endif