ICU 4.4.2 4.4.2
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1998-2010, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File unistr.h 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 09/25/98 stephen Creation. 00013 * 11/11/98 stephen Changed per 11/9 code review. 00014 * 04/20/99 stephen Overhauled per 4/16 code review. 00015 * 11/18/99 aliu Made to inherit from Replaceable. Added method 00016 * handleReplaceBetween(); other methods unchanged. 00017 * 06/25/01 grhoten Remove dependency on iostream. 00018 ****************************************************************************** 00019 */ 00020 00021 #ifndef UNISTR_H 00022 #define UNISTR_H 00023 00029 #include "unicode/utypes.h" 00030 #include "unicode/rep.h" 00031 #include "unicode/std_string.h" 00032 #include "unicode/stringpiece.h" 00033 #include "unicode/bytestream.h" 00034 00035 struct UConverter; // unicode/ucnv.h 00036 class StringThreadTest; 00037 00038 #ifndef U_COMPARE_CODE_POINT_ORDER 00039 /* see also ustring.h and unorm.h */ 00045 #define U_COMPARE_CODE_POINT_ORDER 0x8000 00046 #endif 00047 00048 #ifndef USTRING_H 00049 00052 U_STABLE int32_t U_EXPORT2 00053 u_strlen(const UChar *s); 00054 #endif 00055 00056 U_NAMESPACE_BEGIN 00057 00058 class Locale; // unicode/locid.h 00059 class StringCharacterIterator; 00060 class BreakIterator; // unicode/brkiter.h 00061 00062 /* The <iostream> include has been moved to unicode/ustream.h */ 00063 00074 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant 00075 00093 #if defined(U_DECLARE_UTF16) 00094 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 00095 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 00096 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length) 00097 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 00098 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length) 00099 #else 00100 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV) 00101 #endif 00102 00116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 00117 00187 class U_COMMON_API UnicodeString : public Replaceable 00188 { 00189 public: 00190 00199 enum EInvariant { 00204 kInvariant 00205 }; 00206 00207 //======================================== 00208 // Read-only operations 00209 //======================================== 00210 00211 /* Comparison - bitwise only - for international comparison use collation */ 00212 00220 inline UBool operator== (const UnicodeString& text) const; 00221 00229 inline UBool operator!= (const UnicodeString& text) const; 00230 00238 inline UBool operator> (const UnicodeString& text) const; 00239 00247 inline UBool operator< (const UnicodeString& text) const; 00248 00256 inline UBool operator>= (const UnicodeString& text) const; 00257 00265 inline UBool operator<= (const UnicodeString& text) const; 00266 00278 inline int8_t compare(const UnicodeString& text) const; 00279 00294 inline int8_t compare(int32_t start, 00295 int32_t length, 00296 const UnicodeString& text) const; 00297 00315 inline int8_t compare(int32_t start, 00316 int32_t length, 00317 const UnicodeString& srcText, 00318 int32_t srcStart, 00319 int32_t srcLength) const; 00320 00333 inline int8_t compare(const UChar *srcChars, 00334 int32_t srcLength) const; 00335 00350 inline int8_t compare(int32_t start, 00351 int32_t length, 00352 const UChar *srcChars) const; 00353 00371 inline int8_t compare(int32_t start, 00372 int32_t length, 00373 const UChar *srcChars, 00374 int32_t srcStart, 00375 int32_t srcLength) const; 00376 00394 inline int8_t compareBetween(int32_t start, 00395 int32_t limit, 00396 const UnicodeString& srcText, 00397 int32_t srcStart, 00398 int32_t srcLimit) const; 00399 00417 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 00418 00438 inline int8_t compareCodePointOrder(int32_t start, 00439 int32_t length, 00440 const UnicodeString& srcText) const; 00441 00463 inline int8_t compareCodePointOrder(int32_t start, 00464 int32_t length, 00465 const UnicodeString& srcText, 00466 int32_t srcStart, 00467 int32_t srcLength) const; 00468 00487 inline int8_t compareCodePointOrder(const UChar *srcChars, 00488 int32_t srcLength) const; 00489 00509 inline int8_t compareCodePointOrder(int32_t start, 00510 int32_t length, 00511 const UChar *srcChars) const; 00512 00534 inline int8_t compareCodePointOrder(int32_t start, 00535 int32_t length, 00536 const UChar *srcChars, 00537 int32_t srcStart, 00538 int32_t srcLength) const; 00539 00561 inline int8_t compareCodePointOrderBetween(int32_t start, 00562 int32_t limit, 00563 const UnicodeString& srcText, 00564 int32_t srcStart, 00565 int32_t srcLimit) const; 00566 00585 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 00586 00607 inline int8_t caseCompare(int32_t start, 00608 int32_t length, 00609 const UnicodeString& srcText, 00610 uint32_t options) const; 00611 00634 inline int8_t caseCompare(int32_t start, 00635 int32_t length, 00636 const UnicodeString& srcText, 00637 int32_t srcStart, 00638 int32_t srcLength, 00639 uint32_t options) const; 00640 00660 inline int8_t caseCompare(const UChar *srcChars, 00661 int32_t srcLength, 00662 uint32_t options) const; 00663 00684 inline int8_t caseCompare(int32_t start, 00685 int32_t length, 00686 const UChar *srcChars, 00687 uint32_t options) const; 00688 00711 inline int8_t caseCompare(int32_t start, 00712 int32_t length, 00713 const UChar *srcChars, 00714 int32_t srcStart, 00715 int32_t srcLength, 00716 uint32_t options) const; 00717 00740 inline int8_t caseCompareBetween(int32_t start, 00741 int32_t limit, 00742 const UnicodeString& srcText, 00743 int32_t srcStart, 00744 int32_t srcLimit, 00745 uint32_t options) const; 00746 00754 inline UBool startsWith(const UnicodeString& text) const; 00755 00766 inline UBool startsWith(const UnicodeString& srcText, 00767 int32_t srcStart, 00768 int32_t srcLength) const; 00769 00778 inline UBool startsWith(const UChar *srcChars, 00779 int32_t srcLength) const; 00780 00790 inline UBool startsWith(const UChar *srcChars, 00791 int32_t srcStart, 00792 int32_t srcLength) const; 00793 00801 inline UBool endsWith(const UnicodeString& text) const; 00802 00813 inline UBool endsWith(const UnicodeString& srcText, 00814 int32_t srcStart, 00815 int32_t srcLength) const; 00816 00825 inline UBool endsWith(const UChar *srcChars, 00826 int32_t srcLength) const; 00827 00838 inline UBool endsWith(const UChar *srcChars, 00839 int32_t srcStart, 00840 int32_t srcLength) const; 00841 00842 00843 /* Searching - bitwise only */ 00844 00853 inline int32_t indexOf(const UnicodeString& text) const; 00854 00864 inline int32_t indexOf(const UnicodeString& text, 00865 int32_t start) const; 00866 00878 inline int32_t indexOf(const UnicodeString& text, 00879 int32_t start, 00880 int32_t length) const; 00881 00898 inline int32_t indexOf(const UnicodeString& srcText, 00899 int32_t srcStart, 00900 int32_t srcLength, 00901 int32_t start, 00902 int32_t length) const; 00903 00915 inline int32_t indexOf(const UChar *srcChars, 00916 int32_t srcLength, 00917 int32_t start) const; 00918 00931 inline int32_t indexOf(const UChar *srcChars, 00932 int32_t srcLength, 00933 int32_t start, 00934 int32_t length) const; 00935 00952 int32_t indexOf(const UChar *srcChars, 00953 int32_t srcStart, 00954 int32_t srcLength, 00955 int32_t start, 00956 int32_t length) const; 00957 00965 inline int32_t indexOf(UChar c) const; 00966 00975 inline int32_t indexOf(UChar32 c) const; 00976 00985 inline int32_t indexOf(UChar c, 00986 int32_t start) const; 00987 00997 inline int32_t indexOf(UChar32 c, 00998 int32_t start) const; 00999 01010 inline int32_t indexOf(UChar c, 01011 int32_t start, 01012 int32_t length) const; 01013 01025 inline int32_t indexOf(UChar32 c, 01026 int32_t start, 01027 int32_t length) const; 01028 01037 inline int32_t lastIndexOf(const UnicodeString& text) const; 01038 01048 inline int32_t lastIndexOf(const UnicodeString& text, 01049 int32_t start) const; 01050 01062 inline int32_t lastIndexOf(const UnicodeString& text, 01063 int32_t start, 01064 int32_t length) const; 01065 01082 inline int32_t lastIndexOf(const UnicodeString& srcText, 01083 int32_t srcStart, 01084 int32_t srcLength, 01085 int32_t start, 01086 int32_t length) const; 01087 01098 inline int32_t lastIndexOf(const UChar *srcChars, 01099 int32_t srcLength, 01100 int32_t start) const; 01101 01114 inline int32_t lastIndexOf(const UChar *srcChars, 01115 int32_t srcLength, 01116 int32_t start, 01117 int32_t length) const; 01118 01135 int32_t lastIndexOf(const UChar *srcChars, 01136 int32_t srcStart, 01137 int32_t srcLength, 01138 int32_t start, 01139 int32_t length) const; 01140 01148 inline int32_t lastIndexOf(UChar c) const; 01149 01158 inline int32_t lastIndexOf(UChar32 c) const; 01159 01168 inline int32_t lastIndexOf(UChar c, 01169 int32_t start) const; 01170 01180 inline int32_t lastIndexOf(UChar32 c, 01181 int32_t start) const; 01182 01193 inline int32_t lastIndexOf(UChar c, 01194 int32_t start, 01195 int32_t length) const; 01196 01208 inline int32_t lastIndexOf(UChar32 c, 01209 int32_t start, 01210 int32_t length) const; 01211 01212 01213 /* Character access */ 01214 01223 inline UChar charAt(int32_t offset) const; 01224 01232 inline UChar operator[] (int32_t offset) const; 01233 01245 inline UChar32 char32At(int32_t offset) const; 01246 01262 inline int32_t getChar32Start(int32_t offset) const; 01263 01280 inline int32_t getChar32Limit(int32_t offset) const; 01281 01332 int32_t moveIndex32(int32_t index, int32_t delta) const; 01333 01334 /* Substring extraction */ 01335 01351 inline void extract(int32_t start, 01352 int32_t length, 01353 UChar *dst, 01354 int32_t dstStart = 0) const; 01355 01377 int32_t 01378 extract(UChar *dest, int32_t destCapacity, 01379 UErrorCode &errorCode) const; 01380 01391 inline void extract(int32_t start, 01392 int32_t length, 01393 UnicodeString& target) const; 01394 01406 inline void extractBetween(int32_t start, 01407 int32_t limit, 01408 UChar *dst, 01409 int32_t dstStart = 0) const; 01410 01420 virtual void extractBetween(int32_t start, 01421 int32_t limit, 01422 UnicodeString& target) const; 01423 01445 int32_t extract(int32_t start, 01446 int32_t startLength, 01447 char *target, 01448 int32_t targetCapacity, 01449 enum EInvariant inv) const; 01450 01451 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 01452 01472 int32_t extract(int32_t start, 01473 int32_t startLength, 01474 char *target, 01475 uint32_t targetLength) const; 01476 01477 #endif 01478 01479 #if !UCONFIG_NO_CONVERSION 01480 01506 inline int32_t extract(int32_t start, 01507 int32_t startLength, 01508 char *target, 01509 const char *codepage = 0) const; 01510 01540 int32_t extract(int32_t start, 01541 int32_t startLength, 01542 char *target, 01543 uint32_t targetLength, 01544 const char *codepage) const; 01545 01563 int32_t extract(char *dest, int32_t destCapacity, 01564 UConverter *cnv, 01565 UErrorCode &errorCode) const; 01566 01567 #endif 01568 01582 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 01583 01594 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 01595 01606 void toUTF8(ByteSink &sink) const; 01607 01608 #if U_HAVE_STD_STRING 01609 01622 template<typename StringClass> 01623 StringClass &toUTF8String(StringClass &result) const { 01624 StringByteSink<StringClass> sbs(&result); 01625 toUTF8(sbs); 01626 return result; 01627 } 01628 01629 #endif 01630 01646 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 01647 01648 /* Length operations */ 01649 01658 inline int32_t length(void) const; 01659 01673 int32_t 01674 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 01675 01699 UBool 01700 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 01701 01707 inline UBool isEmpty(void) const; 01708 01718 inline int32_t getCapacity(void) const; 01719 01720 /* Other operations */ 01721 01727 inline int32_t hashCode(void) const; 01728 01741 inline UBool isBogus(void) const; 01742 01743 01744 //======================================== 01745 // Write operations 01746 //======================================== 01747 01748 /* Assignment operations */ 01749 01757 UnicodeString &operator=(const UnicodeString &srcText); 01758 01779 UnicodeString &fastCopyFrom(const UnicodeString &src); 01780 01788 inline UnicodeString& operator= (UChar ch); 01789 01797 inline UnicodeString& operator= (UChar32 ch); 01798 01810 inline UnicodeString& setTo(const UnicodeString& srcText, 01811 int32_t srcStart); 01812 01826 inline UnicodeString& setTo(const UnicodeString& srcText, 01827 int32_t srcStart, 01828 int32_t srcLength); 01829 01838 inline UnicodeString& setTo(const UnicodeString& srcText); 01839 01848 inline UnicodeString& setTo(const UChar *srcChars, 01849 int32_t srcLength); 01850 01859 UnicodeString& setTo(UChar srcChar); 01860 01869 UnicodeString& setTo(UChar32 srcChar); 01870 01891 UnicodeString &setTo(UBool isTerminated, 01892 const UChar *text, 01893 int32_t textLength); 01894 01914 UnicodeString &setTo(UChar *buffer, 01915 int32_t buffLength, 01916 int32_t buffCapacity); 01917 01958 void setToBogus(); 01959 01967 UnicodeString& setCharAt(int32_t offset, 01968 UChar ch); 01969 01970 01971 /* Append operations */ 01972 01980 inline UnicodeString& operator+= (UChar ch); 01981 01989 inline UnicodeString& operator+= (UChar32 ch); 01990 01999 inline UnicodeString& operator+= (const UnicodeString& srcText); 02000 02015 inline UnicodeString& append(const UnicodeString& srcText, 02016 int32_t srcStart, 02017 int32_t srcLength); 02018 02026 inline UnicodeString& append(const UnicodeString& srcText); 02027 02041 inline UnicodeString& append(const UChar *srcChars, 02042 int32_t srcStart, 02043 int32_t srcLength); 02044 02053 inline UnicodeString& append(const UChar *srcChars, 02054 int32_t srcLength); 02055 02062 inline UnicodeString& append(UChar srcChar); 02063 02070 inline UnicodeString& append(UChar32 srcChar); 02071 02072 02073 /* Insert operations */ 02074 02088 inline UnicodeString& insert(int32_t start, 02089 const UnicodeString& srcText, 02090 int32_t srcStart, 02091 int32_t srcLength); 02092 02101 inline UnicodeString& insert(int32_t start, 02102 const UnicodeString& srcText); 02103 02117 inline UnicodeString& insert(int32_t start, 02118 const UChar *srcChars, 02119 int32_t srcStart, 02120 int32_t srcLength); 02121 02131 inline UnicodeString& insert(int32_t start, 02132 const UChar *srcChars, 02133 int32_t srcLength); 02134 02143 inline UnicodeString& insert(int32_t start, 02144 UChar srcChar); 02145 02154 inline UnicodeString& insert(int32_t start, 02155 UChar32 srcChar); 02156 02157 02158 /* Replace operations */ 02159 02177 UnicodeString& replace(int32_t start, 02178 int32_t length, 02179 const UnicodeString& srcText, 02180 int32_t srcStart, 02181 int32_t srcLength); 02182 02195 UnicodeString& replace(int32_t start, 02196 int32_t length, 02197 const UnicodeString& srcText); 02198 02216 UnicodeString& replace(int32_t start, 02217 int32_t length, 02218 const UChar *srcChars, 02219 int32_t srcStart, 02220 int32_t srcLength); 02221 02234 inline UnicodeString& replace(int32_t start, 02235 int32_t length, 02236 const UChar *srcChars, 02237 int32_t srcLength); 02238 02250 inline UnicodeString& replace(int32_t start, 02251 int32_t length, 02252 UChar srcChar); 02253 02265 inline UnicodeString& replace(int32_t start, 02266 int32_t length, 02267 UChar32 srcChar); 02268 02278 inline UnicodeString& replaceBetween(int32_t start, 02279 int32_t limit, 02280 const UnicodeString& srcText); 02281 02296 inline UnicodeString& replaceBetween(int32_t start, 02297 int32_t limit, 02298 const UnicodeString& srcText, 02299 int32_t srcStart, 02300 int32_t srcLimit); 02301 02312 virtual void handleReplaceBetween(int32_t start, 02313 int32_t limit, 02314 const UnicodeString& text); 02315 02321 virtual UBool hasMetaData() const; 02322 02338 virtual void copy(int32_t start, int32_t limit, int32_t dest); 02339 02340 /* Search and replace operations */ 02341 02350 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 02351 const UnicodeString& newText); 02352 02364 inline UnicodeString& findAndReplace(int32_t start, 02365 int32_t length, 02366 const UnicodeString& oldText, 02367 const UnicodeString& newText); 02368 02386 UnicodeString& findAndReplace(int32_t start, 02387 int32_t length, 02388 const UnicodeString& oldText, 02389 int32_t oldStart, 02390 int32_t oldLength, 02391 const UnicodeString& newText, 02392 int32_t newStart, 02393 int32_t newLength); 02394 02395 02396 /* Remove operations */ 02397 02403 inline UnicodeString& remove(void); 02404 02413 inline UnicodeString& remove(int32_t start, 02414 int32_t length = (int32_t)INT32_MAX); 02415 02424 inline UnicodeString& removeBetween(int32_t start, 02425 int32_t limit = (int32_t)INT32_MAX); 02426 02436 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 02437 02438 /* Length operations */ 02439 02451 UBool padLeading(int32_t targetLength, 02452 UChar padChar = 0x0020); 02453 02465 UBool padTrailing(int32_t targetLength, 02466 UChar padChar = 0x0020); 02467 02474 inline UBool truncate(int32_t targetLength); 02475 02481 UnicodeString& trim(void); 02482 02483 02484 /* Miscellaneous operations */ 02485 02491 inline UnicodeString& reverse(void); 02492 02501 inline UnicodeString& reverse(int32_t start, 02502 int32_t length); 02503 02510 UnicodeString& toUpper(void); 02511 02519 UnicodeString& toUpper(const Locale& locale); 02520 02527 UnicodeString& toLower(void); 02528 02536 UnicodeString& toLower(const Locale& locale); 02537 02538 #if !UCONFIG_NO_BREAK_ITERATION 02539 02566 UnicodeString &toTitle(BreakIterator *titleIter); 02567 02595 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 02596 02628 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 02629 02630 #endif 02631 02643 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 02644 02645 //======================================== 02646 // Access to the internal buffer 02647 //======================================== 02648 02692 UChar *getBuffer(int32_t minCapacity); 02693 02714 void releaseBuffer(int32_t newLength=-1); 02715 02746 inline const UChar *getBuffer() const; 02747 02781 inline const UChar *getTerminatedBuffer(); 02782 02783 //======================================== 02784 // Constructors 02785 //======================================== 02786 02790 UnicodeString(); 02791 02803 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 02804 02810 UnicodeString(UChar ch); 02811 02817 UnicodeString(UChar32 ch); 02818 02825 UnicodeString(const UChar *text); 02826 02834 UnicodeString(const UChar *text, 02835 int32_t textLength); 02836 02856 UnicodeString(UBool isTerminated, 02857 const UChar *text, 02858 int32_t textLength); 02859 02878 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 02879 02880 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 02881 02888 UnicodeString(const char *codepageData); 02889 02896 UnicodeString(const char *codepageData, int32_t dataLength); 02897 02898 #endif 02899 02900 #if !UCONFIG_NO_CONVERSION 02901 02919 UnicodeString(const char *codepageData, const char *codepage); 02920 02938 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 02939 02961 UnicodeString( 02962 const char *src, int32_t srcLength, 02963 UConverter *cnv, 02964 UErrorCode &errorCode); 02965 02966 #endif 02967 02992 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 02993 02994 03000 UnicodeString(const UnicodeString& that); 03001 03008 UnicodeString(const UnicodeString& src, int32_t srcStart); 03009 03017 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 03018 03035 virtual Replaceable *clone() const; 03036 03040 virtual ~UnicodeString(); 03041 03055 static UnicodeString fromUTF8(const StringPiece &utf8); 03056 03068 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 03069 03070 /* Miscellaneous operations */ 03071 03106 UnicodeString unescape() const; 03107 03127 UChar32 unescapeAt(int32_t &offset) const; 03128 03134 static UClassID U_EXPORT2 getStaticClassID(); 03135 03141 virtual UClassID getDynamicClassID() const; 03142 03143 //======================================== 03144 // Implementation methods 03145 //======================================== 03146 03147 protected: 03152 virtual int32_t getLength() const; 03153 03159 virtual UChar getCharAt(int32_t offset) const; 03160 03166 virtual UChar32 getChar32At(int32_t offset) const; 03167 03168 private: 03169 // For char* constructors. Could be made public. 03170 UnicodeString &setToUTF8(const StringPiece &utf8); 03171 // For extract(char*). 03172 // We could make a toUTF8(target, capacity, errorCode) public but not 03173 // this version: New API will be cleaner if we make callers create substrings 03174 // rather than having start+length on every method, 03175 // and it should take a UErrorCode&. 03176 int32_t 03177 toUTF8(int32_t start, int32_t len, 03178 char *target, int32_t capacity) const; 03179 03180 03181 inline int8_t 03182 doCompare(int32_t start, 03183 int32_t length, 03184 const UnicodeString& srcText, 03185 int32_t srcStart, 03186 int32_t srcLength) const; 03187 03188 int8_t doCompare(int32_t start, 03189 int32_t length, 03190 const UChar *srcChars, 03191 int32_t srcStart, 03192 int32_t srcLength) const; 03193 03194 inline int8_t 03195 doCompareCodePointOrder(int32_t start, 03196 int32_t length, 03197 const UnicodeString& srcText, 03198 int32_t srcStart, 03199 int32_t srcLength) const; 03200 03201 int8_t doCompareCodePointOrder(int32_t start, 03202 int32_t length, 03203 const UChar *srcChars, 03204 int32_t srcStart, 03205 int32_t srcLength) const; 03206 03207 inline int8_t 03208 doCaseCompare(int32_t start, 03209 int32_t length, 03210 const UnicodeString &srcText, 03211 int32_t srcStart, 03212 int32_t srcLength, 03213 uint32_t options) const; 03214 03215 int8_t 03216 doCaseCompare(int32_t start, 03217 int32_t length, 03218 const UChar *srcChars, 03219 int32_t srcStart, 03220 int32_t srcLength, 03221 uint32_t options) const; 03222 03223 int32_t doIndexOf(UChar c, 03224 int32_t start, 03225 int32_t length) const; 03226 03227 int32_t doIndexOf(UChar32 c, 03228 int32_t start, 03229 int32_t length) const; 03230 03231 int32_t doLastIndexOf(UChar c, 03232 int32_t start, 03233 int32_t length) const; 03234 03235 int32_t doLastIndexOf(UChar32 c, 03236 int32_t start, 03237 int32_t length) const; 03238 03239 void doExtract(int32_t start, 03240 int32_t length, 03241 UChar *dst, 03242 int32_t dstStart) const; 03243 03244 inline void doExtract(int32_t start, 03245 int32_t length, 03246 UnicodeString& target) const; 03247 03248 inline UChar doCharAt(int32_t offset) const; 03249 03250 UnicodeString& doReplace(int32_t start, 03251 int32_t length, 03252 const UnicodeString& srcText, 03253 int32_t srcStart, 03254 int32_t srcLength); 03255 03256 UnicodeString& doReplace(int32_t start, 03257 int32_t length, 03258 const UChar *srcChars, 03259 int32_t srcStart, 03260 int32_t srcLength); 03261 03262 UnicodeString& doReverse(int32_t start, 03263 int32_t length); 03264 03265 // calculate hash code 03266 int32_t doHashCode(void) const; 03267 03268 // get pointer to start of array 03269 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 03270 inline UChar* getArrayStart(void); 03271 inline const UChar* getArrayStart(void) const; 03272 03273 // A UnicodeString object (not necessarily its current buffer) 03274 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 03275 inline UBool isWritable() const; 03276 03277 // Is the current buffer writable? 03278 inline UBool isBufferWritable() const; 03279 03280 // None of the following does releaseArray(). 03281 inline void setLength(int32_t len); // sets only fShortLength and fLength 03282 inline void setToEmpty(); // sets fFlags=kShortString 03283 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 03284 03285 // allocate the array; result may be fStackBuffer 03286 // sets refCount to 1 if appropriate 03287 // sets fArray, fCapacity, and fFlags 03288 // returns boolean for success or failure 03289 UBool allocate(int32_t capacity); 03290 03291 // release the array if owned 03292 void releaseArray(void); 03293 03294 // turn a bogus string into an empty one 03295 void unBogus(); 03296 03297 // implements assigment operator, copy constructor, and fastCopyFrom() 03298 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 03299 03300 // Pin start and limit to acceptable values. 03301 inline void pinIndex(int32_t& start) const; 03302 inline void pinIndices(int32_t& start, 03303 int32_t& length) const; 03304 03305 #if !UCONFIG_NO_CONVERSION 03306 03307 /* Internal extract() using UConverter. */ 03308 int32_t doExtract(int32_t start, int32_t length, 03309 char *dest, int32_t destCapacity, 03310 UConverter *cnv, 03311 UErrorCode &errorCode) const; 03312 03313 /* 03314 * Real constructor for converting from codepage data. 03315 * It assumes that it is called with !fRefCounted. 03316 * 03317 * If <code>codepage==0</code>, then the default converter 03318 * is used for the platform encoding. 03319 * If <code>codepage</code> is an empty string (<code>""</code>), 03320 * then a simple conversion is performed on the codepage-invariant 03321 * subset ("invariant characters") of the platform encoding. See utypes.h. 03322 */ 03323 void doCodepageCreate(const char *codepageData, 03324 int32_t dataLength, 03325 const char *codepage); 03326 03327 /* 03328 * Worker function for creating a UnicodeString from 03329 * a codepage string using a UConverter. 03330 */ 03331 void 03332 doCodepageCreate(const char *codepageData, 03333 int32_t dataLength, 03334 UConverter *converter, 03335 UErrorCode &status); 03336 03337 #endif 03338 03339 /* 03340 * This function is called when write access to the array 03341 * is necessary. 03342 * 03343 * We need to make a copy of the array if 03344 * the buffer is read-only, or 03345 * the buffer is refCounted (shared), and refCount>1, or 03346 * the buffer is too small. 03347 * 03348 * Return FALSE if memory could not be allocated. 03349 */ 03350 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 03351 int32_t growCapacity = -1, 03352 UBool doCopyArray = TRUE, 03353 int32_t **pBufferToDelete = 0, 03354 UBool forceClone = FALSE); 03355 03356 // common function for case mappings 03357 UnicodeString & 03358 caseMap(BreakIterator *titleIter, 03359 const char *locale, 03360 uint32_t options, 03361 int32_t toWhichCase); 03362 03363 // ref counting 03364 void addRef(void); 03365 int32_t removeRef(void); 03366 int32_t refCount(void) const; 03367 03368 // constants 03369 enum { 03370 // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer): 03371 // 32-bit pointers: 4+1+1+13*2 = 32 bytes 03372 // 64-bit pointers: 8+1+1+15*2 = 40 bytes 03373 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings 03374 kInvalidUChar=0xffff, // invalid UChar index 03375 kGrowSize=128, // grow size for this buffer 03376 kInvalidHashCode=0, // invalid hash code 03377 kEmptyHashCode=1, // hash code for empty string 03378 03379 // bit flag values for fFlags 03380 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 03381 kUsingStackBuffer=2,// fArray==fStackBuffer 03382 kRefCounted=4, // there is a refCount field before the characters in fArray 03383 kBufferIsReadonly=8,// do not write to this buffer 03384 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 03385 // and releaseBuffer(newLength) must be called 03386 03387 // combined values for convenience 03388 kShortString=kUsingStackBuffer, 03389 kLongString=kRefCounted, 03390 kReadonlyAlias=kBufferIsReadonly, 03391 kWritableAlias=0 03392 }; 03393 03394 friend class StringThreadTest; 03395 03396 union StackBufferOrFields; // forward declaration necessary before friend declaration 03397 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 03398 03399 /* 03400 * The following are all the class fields that are stored 03401 * in each UnicodeString object. 03402 * Note that UnicodeString has virtual functions, 03403 * therefore there is an implicit vtable pointer 03404 * as the first real field. 03405 * The fields should be aligned such that no padding is 03406 * necessary, mostly by having larger types first. 03407 * On 32-bit machines, the size should be 32 bytes, 03408 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 03409 */ 03410 // (implicit) *vtable; 03411 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 03412 uint8_t fFlags; // bit flags: see constants above 03413 union StackBufferOrFields { 03414 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 03415 // else fFields is used 03416 UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings 03417 struct { 03418 uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b) 03419 int32_t fLength; // number of characters in fArray if >127; else undefined 03420 UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b)) 03421 int32_t fCapacity; // sizeof fArray 03422 } fFields; 03423 } fUnion; 03424 }; 03425 03434 U_COMMON_API UnicodeString U_EXPORT2 03435 operator+ (const UnicodeString &s1, const UnicodeString &s2); 03436 03437 //======================================== 03438 // Inline members 03439 //======================================== 03440 03441 //======================================== 03442 // Privates 03443 //======================================== 03444 03445 inline void 03446 UnicodeString::pinIndex(int32_t& start) const 03447 { 03448 // pin index 03449 if(start < 0) { 03450 start = 0; 03451 } else if(start > length()) { 03452 start = length(); 03453 } 03454 } 03455 03456 inline void 03457 UnicodeString::pinIndices(int32_t& start, 03458 int32_t& _length) const 03459 { 03460 // pin indices 03461 int32_t len = length(); 03462 if(start < 0) { 03463 start = 0; 03464 } else if(start > len) { 03465 start = len; 03466 } 03467 if(_length < 0) { 03468 _length = 0; 03469 } else if(_length > (len - start)) { 03470 _length = (len - start); 03471 } 03472 } 03473 03474 inline UChar* 03475 UnicodeString::getArrayStart() 03476 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03477 03478 inline const UChar* 03479 UnicodeString::getArrayStart() const 03480 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03481 03482 //======================================== 03483 // Read-only implementation methods 03484 //======================================== 03485 inline int32_t 03486 UnicodeString::length() const 03487 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 03488 03489 inline int32_t 03490 UnicodeString::getCapacity() const 03491 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 03492 03493 inline int32_t 03494 UnicodeString::hashCode() const 03495 { return doHashCode(); } 03496 03497 inline UBool 03498 UnicodeString::isBogus() const 03499 { return (UBool)(fFlags & kIsBogus); } 03500 03501 inline UBool 03502 UnicodeString::isWritable() const 03503 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 03504 03505 inline UBool 03506 UnicodeString::isBufferWritable() const 03507 { 03508 return (UBool)( 03509 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 03510 (!(fFlags&kRefCounted) || refCount()==1)); 03511 } 03512 03513 inline const UChar * 03514 UnicodeString::getBuffer() const { 03515 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 03516 return 0; 03517 } else if(fFlags&kUsingStackBuffer) { 03518 return fUnion.fStackBuffer; 03519 } else { 03520 return fUnion.fFields.fArray; 03521 } 03522 } 03523 03524 //======================================== 03525 // Read-only alias methods 03526 //======================================== 03527 inline int8_t 03528 UnicodeString::doCompare(int32_t start, 03529 int32_t thisLength, 03530 const UnicodeString& srcText, 03531 int32_t srcStart, 03532 int32_t srcLength) const 03533 { 03534 if(srcText.isBogus()) { 03535 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03536 } else { 03537 srcText.pinIndices(srcStart, srcLength); 03538 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03539 } 03540 } 03541 03542 inline UBool 03543 UnicodeString::operator== (const UnicodeString& text) const 03544 { 03545 if(isBogus()) { 03546 return text.isBogus(); 03547 } else { 03548 int32_t len = length(), textLength = text.length(); 03549 return 03550 !text.isBogus() && 03551 len == textLength && 03552 doCompare(0, len, text, 0, textLength) == 0; 03553 } 03554 } 03555 03556 inline UBool 03557 UnicodeString::operator!= (const UnicodeString& text) const 03558 { return (! operator==(text)); } 03559 03560 inline UBool 03561 UnicodeString::operator> (const UnicodeString& text) const 03562 { return doCompare(0, length(), text, 0, text.length()) == 1; } 03563 03564 inline UBool 03565 UnicodeString::operator< (const UnicodeString& text) const 03566 { return doCompare(0, length(), text, 0, text.length()) == -1; } 03567 03568 inline UBool 03569 UnicodeString::operator>= (const UnicodeString& text) const 03570 { return doCompare(0, length(), text, 0, text.length()) != -1; } 03571 03572 inline UBool 03573 UnicodeString::operator<= (const UnicodeString& text) const 03574 { return doCompare(0, length(), text, 0, text.length()) != 1; } 03575 03576 inline int8_t 03577 UnicodeString::compare(const UnicodeString& text) const 03578 { return doCompare(0, length(), text, 0, text.length()); } 03579 03580 inline int8_t 03581 UnicodeString::compare(int32_t start, 03582 int32_t _length, 03583 const UnicodeString& srcText) const 03584 { return doCompare(start, _length, srcText, 0, srcText.length()); } 03585 03586 inline int8_t 03587 UnicodeString::compare(const UChar *srcChars, 03588 int32_t srcLength) const 03589 { return doCompare(0, length(), srcChars, 0, srcLength); } 03590 03591 inline int8_t 03592 UnicodeString::compare(int32_t start, 03593 int32_t _length, 03594 const UnicodeString& srcText, 03595 int32_t srcStart, 03596 int32_t srcLength) const 03597 { return doCompare(start, _length, srcText, srcStart, srcLength); } 03598 03599 inline int8_t 03600 UnicodeString::compare(int32_t start, 03601 int32_t _length, 03602 const UChar *srcChars) const 03603 { return doCompare(start, _length, srcChars, 0, _length); } 03604 03605 inline int8_t 03606 UnicodeString::compare(int32_t start, 03607 int32_t _length, 03608 const UChar *srcChars, 03609 int32_t srcStart, 03610 int32_t srcLength) const 03611 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 03612 03613 inline int8_t 03614 UnicodeString::compareBetween(int32_t start, 03615 int32_t limit, 03616 const UnicodeString& srcText, 03617 int32_t srcStart, 03618 int32_t srcLimit) const 03619 { return doCompare(start, limit - start, 03620 srcText, srcStart, srcLimit - srcStart); } 03621 03622 inline int8_t 03623 UnicodeString::doCompareCodePointOrder(int32_t start, 03624 int32_t thisLength, 03625 const UnicodeString& srcText, 03626 int32_t srcStart, 03627 int32_t srcLength) const 03628 { 03629 if(srcText.isBogus()) { 03630 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03631 } else { 03632 srcText.pinIndices(srcStart, srcLength); 03633 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03634 } 03635 } 03636 03637 inline int8_t 03638 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 03639 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 03640 03641 inline int8_t 03642 UnicodeString::compareCodePointOrder(int32_t start, 03643 int32_t _length, 03644 const UnicodeString& srcText) const 03645 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 03646 03647 inline int8_t 03648 UnicodeString::compareCodePointOrder(const UChar *srcChars, 03649 int32_t srcLength) const 03650 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 03651 03652 inline int8_t 03653 UnicodeString::compareCodePointOrder(int32_t start, 03654 int32_t _length, 03655 const UnicodeString& srcText, 03656 int32_t srcStart, 03657 int32_t srcLength) const 03658 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 03659 03660 inline int8_t 03661 UnicodeString::compareCodePointOrder(int32_t start, 03662 int32_t _length, 03663 const UChar *srcChars) const 03664 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 03665 03666 inline int8_t 03667 UnicodeString::compareCodePointOrder(int32_t start, 03668 int32_t _length, 03669 const UChar *srcChars, 03670 int32_t srcStart, 03671 int32_t srcLength) const 03672 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 03673 03674 inline int8_t 03675 UnicodeString::compareCodePointOrderBetween(int32_t start, 03676 int32_t limit, 03677 const UnicodeString& srcText, 03678 int32_t srcStart, 03679 int32_t srcLimit) const 03680 { return doCompareCodePointOrder(start, limit - start, 03681 srcText, srcStart, srcLimit - srcStart); } 03682 03683 inline int8_t 03684 UnicodeString::doCaseCompare(int32_t start, 03685 int32_t thisLength, 03686 const UnicodeString &srcText, 03687 int32_t srcStart, 03688 int32_t srcLength, 03689 uint32_t options) const 03690 { 03691 if(srcText.isBogus()) { 03692 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03693 } else { 03694 srcText.pinIndices(srcStart, srcLength); 03695 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 03696 } 03697 } 03698 03699 inline int8_t 03700 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 03701 return doCaseCompare(0, length(), text, 0, text.length(), options); 03702 } 03703 03704 inline int8_t 03705 UnicodeString::caseCompare(int32_t start, 03706 int32_t _length, 03707 const UnicodeString &srcText, 03708 uint32_t options) const { 03709 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 03710 } 03711 03712 inline int8_t 03713 UnicodeString::caseCompare(const UChar *srcChars, 03714 int32_t srcLength, 03715 uint32_t options) const { 03716 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 03717 } 03718 03719 inline int8_t 03720 UnicodeString::caseCompare(int32_t start, 03721 int32_t _length, 03722 const UnicodeString &srcText, 03723 int32_t srcStart, 03724 int32_t srcLength, 03725 uint32_t options) const { 03726 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 03727 } 03728 03729 inline int8_t 03730 UnicodeString::caseCompare(int32_t start, 03731 int32_t _length, 03732 const UChar *srcChars, 03733 uint32_t options) const { 03734 return doCaseCompare(start, _length, srcChars, 0, _length, options); 03735 } 03736 03737 inline int8_t 03738 UnicodeString::caseCompare(int32_t start, 03739 int32_t _length, 03740 const UChar *srcChars, 03741 int32_t srcStart, 03742 int32_t srcLength, 03743 uint32_t options) const { 03744 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 03745 } 03746 03747 inline int8_t 03748 UnicodeString::caseCompareBetween(int32_t start, 03749 int32_t limit, 03750 const UnicodeString &srcText, 03751 int32_t srcStart, 03752 int32_t srcLimit, 03753 uint32_t options) const { 03754 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 03755 } 03756 03757 inline int32_t 03758 UnicodeString::indexOf(const UnicodeString& srcText, 03759 int32_t srcStart, 03760 int32_t srcLength, 03761 int32_t start, 03762 int32_t _length) const 03763 { 03764 if(!srcText.isBogus()) { 03765 srcText.pinIndices(srcStart, srcLength); 03766 if(srcLength > 0) { 03767 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03768 } 03769 } 03770 return -1; 03771 } 03772 03773 inline int32_t 03774 UnicodeString::indexOf(const UnicodeString& text) const 03775 { return indexOf(text, 0, text.length(), 0, length()); } 03776 03777 inline int32_t 03778 UnicodeString::indexOf(const UnicodeString& text, 03779 int32_t start) const { 03780 pinIndex(start); 03781 return indexOf(text, 0, text.length(), start, length() - start); 03782 } 03783 03784 inline int32_t 03785 UnicodeString::indexOf(const UnicodeString& text, 03786 int32_t start, 03787 int32_t _length) const 03788 { return indexOf(text, 0, text.length(), start, _length); } 03789 03790 inline int32_t 03791 UnicodeString::indexOf(const UChar *srcChars, 03792 int32_t srcLength, 03793 int32_t start) const { 03794 pinIndex(start); 03795 return indexOf(srcChars, 0, srcLength, start, length() - start); 03796 } 03797 03798 inline int32_t 03799 UnicodeString::indexOf(const UChar *srcChars, 03800 int32_t srcLength, 03801 int32_t start, 03802 int32_t _length) const 03803 { return indexOf(srcChars, 0, srcLength, start, _length); } 03804 03805 inline int32_t 03806 UnicodeString::indexOf(UChar c, 03807 int32_t start, 03808 int32_t _length) const 03809 { return doIndexOf(c, start, _length); } 03810 03811 inline int32_t 03812 UnicodeString::indexOf(UChar32 c, 03813 int32_t start, 03814 int32_t _length) const 03815 { return doIndexOf(c, start, _length); } 03816 03817 inline int32_t 03818 UnicodeString::indexOf(UChar c) const 03819 { return doIndexOf(c, 0, length()); } 03820 03821 inline int32_t 03822 UnicodeString::indexOf(UChar32 c) const 03823 { return indexOf(c, 0, length()); } 03824 03825 inline int32_t 03826 UnicodeString::indexOf(UChar c, 03827 int32_t start) const { 03828 pinIndex(start); 03829 return doIndexOf(c, start, length() - start); 03830 } 03831 03832 inline int32_t 03833 UnicodeString::indexOf(UChar32 c, 03834 int32_t start) const { 03835 pinIndex(start); 03836 return indexOf(c, start, length() - start); 03837 } 03838 03839 inline int32_t 03840 UnicodeString::lastIndexOf(const UChar *srcChars, 03841 int32_t srcLength, 03842 int32_t start, 03843 int32_t _length) const 03844 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 03845 03846 inline int32_t 03847 UnicodeString::lastIndexOf(const UChar *srcChars, 03848 int32_t srcLength, 03849 int32_t start) const { 03850 pinIndex(start); 03851 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 03852 } 03853 03854 inline int32_t 03855 UnicodeString::lastIndexOf(const UnicodeString& srcText, 03856 int32_t srcStart, 03857 int32_t srcLength, 03858 int32_t start, 03859 int32_t _length) const 03860 { 03861 if(!srcText.isBogus()) { 03862 srcText.pinIndices(srcStart, srcLength); 03863 if(srcLength > 0) { 03864 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03865 } 03866 } 03867 return -1; 03868 } 03869 03870 inline int32_t 03871 UnicodeString::lastIndexOf(const UnicodeString& text, 03872 int32_t start, 03873 int32_t _length) const 03874 { return lastIndexOf(text, 0, text.length(), start, _length); } 03875 03876 inline int32_t 03877 UnicodeString::lastIndexOf(const UnicodeString& text, 03878 int32_t start) const { 03879 pinIndex(start); 03880 return lastIndexOf(text, 0, text.length(), start, length() - start); 03881 } 03882 03883 inline int32_t 03884 UnicodeString::lastIndexOf(const UnicodeString& text) const 03885 { return lastIndexOf(text, 0, text.length(), 0, length()); } 03886 03887 inline int32_t 03888 UnicodeString::lastIndexOf(UChar c, 03889 int32_t start, 03890 int32_t _length) const 03891 { return doLastIndexOf(c, start, _length); } 03892 03893 inline int32_t 03894 UnicodeString::lastIndexOf(UChar32 c, 03895 int32_t start, 03896 int32_t _length) const { 03897 return doLastIndexOf(c, start, _length); 03898 } 03899 03900 inline int32_t 03901 UnicodeString::lastIndexOf(UChar c) const 03902 { return doLastIndexOf(c, 0, length()); } 03903 03904 inline int32_t 03905 UnicodeString::lastIndexOf(UChar32 c) const { 03906 return lastIndexOf(c, 0, length()); 03907 } 03908 03909 inline int32_t 03910 UnicodeString::lastIndexOf(UChar c, 03911 int32_t start) const { 03912 pinIndex(start); 03913 return doLastIndexOf(c, start, length() - start); 03914 } 03915 03916 inline int32_t 03917 UnicodeString::lastIndexOf(UChar32 c, 03918 int32_t start) const { 03919 pinIndex(start); 03920 return lastIndexOf(c, start, length() - start); 03921 } 03922 03923 inline UBool 03924 UnicodeString::startsWith(const UnicodeString& text) const 03925 { return compare(0, text.length(), text, 0, text.length()) == 0; } 03926 03927 inline UBool 03928 UnicodeString::startsWith(const UnicodeString& srcText, 03929 int32_t srcStart, 03930 int32_t srcLength) const 03931 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 03932 03933 inline UBool 03934 UnicodeString::startsWith(const UChar *srcChars, 03935 int32_t srcLength) const 03936 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } 03937 03938 inline UBool 03939 UnicodeString::startsWith(const UChar *srcChars, 03940 int32_t srcStart, 03941 int32_t srcLength) const 03942 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;} 03943 03944 inline UBool 03945 UnicodeString::endsWith(const UnicodeString& text) const 03946 { return doCompare(length() - text.length(), text.length(), 03947 text, 0, text.length()) == 0; } 03948 03949 inline UBool 03950 UnicodeString::endsWith(const UnicodeString& srcText, 03951 int32_t srcStart, 03952 int32_t srcLength) const { 03953 srcText.pinIndices(srcStart, srcLength); 03954 return doCompare(length() - srcLength, srcLength, 03955 srcText, srcStart, srcLength) == 0; 03956 } 03957 03958 inline UBool 03959 UnicodeString::endsWith(const UChar *srcChars, 03960 int32_t srcLength) const { 03961 if(srcLength < 0) { 03962 srcLength = u_strlen(srcChars); 03963 } 03964 return doCompare(length() - srcLength, srcLength, 03965 srcChars, 0, srcLength) == 0; 03966 } 03967 03968 inline UBool 03969 UnicodeString::endsWith(const UChar *srcChars, 03970 int32_t srcStart, 03971 int32_t srcLength) const { 03972 if(srcLength < 0) { 03973 srcLength = u_strlen(srcChars + srcStart); 03974 } 03975 return doCompare(length() - srcLength, srcLength, 03976 srcChars, srcStart, srcLength) == 0; 03977 } 03978 03979 //======================================== 03980 // replace 03981 //======================================== 03982 inline UnicodeString& 03983 UnicodeString::replace(int32_t start, 03984 int32_t _length, 03985 const UnicodeString& srcText) 03986 { return doReplace(start, _length, srcText, 0, srcText.length()); } 03987 03988 inline UnicodeString& 03989 UnicodeString::replace(int32_t start, 03990 int32_t _length, 03991 const UnicodeString& srcText, 03992 int32_t srcStart, 03993 int32_t srcLength) 03994 { return doReplace(start, _length, srcText, srcStart, srcLength); } 03995 03996 inline UnicodeString& 03997 UnicodeString::replace(int32_t start, 03998 int32_t _length, 03999 const UChar *srcChars, 04000 int32_t srcLength) 04001 { return doReplace(start, _length, srcChars, 0, srcLength); } 04002 04003 inline UnicodeString& 04004 UnicodeString::replace(int32_t start, 04005 int32_t _length, 04006 const UChar *srcChars, 04007 int32_t srcStart, 04008 int32_t srcLength) 04009 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 04010 04011 inline UnicodeString& 04012 UnicodeString::replace(int32_t start, 04013 int32_t _length, 04014 UChar srcChar) 04015 { return doReplace(start, _length, &srcChar, 0, 1); } 04016 04017 inline UnicodeString& 04018 UnicodeString::replace(int32_t start, 04019 int32_t _length, 04020 UChar32 srcChar) { 04021 UChar buffer[U16_MAX_LENGTH]; 04022 int32_t count = 0; 04023 UBool isError = FALSE; 04024 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 04025 return doReplace(start, _length, buffer, 0, count); 04026 } 04027 04028 inline UnicodeString& 04029 UnicodeString::replaceBetween(int32_t start, 04030 int32_t limit, 04031 const UnicodeString& srcText) 04032 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 04033 04034 inline UnicodeString& 04035 UnicodeString::replaceBetween(int32_t start, 04036 int32_t limit, 04037 const UnicodeString& srcText, 04038 int32_t srcStart, 04039 int32_t srcLimit) 04040 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 04041 04042 inline UnicodeString& 04043 UnicodeString::findAndReplace(const UnicodeString& oldText, 04044 const UnicodeString& newText) 04045 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 04046 newText, 0, newText.length()); } 04047 04048 inline UnicodeString& 04049 UnicodeString::findAndReplace(int32_t start, 04050 int32_t _length, 04051 const UnicodeString& oldText, 04052 const UnicodeString& newText) 04053 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 04054 newText, 0, newText.length()); } 04055 04056 // ============================ 04057 // extract 04058 // ============================ 04059 inline void 04060 UnicodeString::doExtract(int32_t start, 04061 int32_t _length, 04062 UnicodeString& target) const 04063 { target.replace(0, target.length(), *this, start, _length); } 04064 04065 inline void 04066 UnicodeString::extract(int32_t start, 04067 int32_t _length, 04068 UChar *target, 04069 int32_t targetStart) const 04070 { doExtract(start, _length, target, targetStart); } 04071 04072 inline void 04073 UnicodeString::extract(int32_t start, 04074 int32_t _length, 04075 UnicodeString& target) const 04076 { doExtract(start, _length, target); } 04077 04078 #if !UCONFIG_NO_CONVERSION 04079 04080 inline int32_t 04081 UnicodeString::extract(int32_t start, 04082 int32_t _length, 04083 char *dst, 04084 const char *codepage) const 04085 04086 { 04087 // This dstSize value will be checked explicitly 04088 #if defined(__GNUC__) 04089 // Ticket #7039: Clip length to the maximum valid length to the end of addressable memory given the starting address 04090 // This is only an issue when using GCC and certain optimizations are turned on. 04091 return extract(start, _length, dst, dst!=0 ? ((dst >= (char*)((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage); 04092 #else 04093 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 04094 #endif 04095 } 04096 04097 #endif 04098 04099 inline void 04100 UnicodeString::extractBetween(int32_t start, 04101 int32_t limit, 04102 UChar *dst, 04103 int32_t dstStart) const { 04104 pinIndex(start); 04105 pinIndex(limit); 04106 doExtract(start, limit - start, dst, dstStart); 04107 } 04108 04109 inline UnicodeString 04110 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 04111 return tempSubString(start, limit - start); 04112 } 04113 04114 inline UChar 04115 UnicodeString::doCharAt(int32_t offset) const 04116 { 04117 if((uint32_t)offset < (uint32_t)length()) { 04118 return getArrayStart()[offset]; 04119 } else { 04120 return kInvalidUChar; 04121 } 04122 } 04123 04124 inline UChar 04125 UnicodeString::charAt(int32_t offset) const 04126 { return doCharAt(offset); } 04127 04128 inline UChar 04129 UnicodeString::operator[] (int32_t offset) const 04130 { return doCharAt(offset); } 04131 04132 inline UChar32 04133 UnicodeString::char32At(int32_t offset) const 04134 { 04135 int32_t len = length(); 04136 if((uint32_t)offset < (uint32_t)len) { 04137 const UChar *array = getArrayStart(); 04138 UChar32 c; 04139 U16_GET(array, 0, offset, len, c); 04140 return c; 04141 } else { 04142 return kInvalidUChar; 04143 } 04144 } 04145 04146 inline int32_t 04147 UnicodeString::getChar32Start(int32_t offset) const { 04148 if((uint32_t)offset < (uint32_t)length()) { 04149 const UChar *array = getArrayStart(); 04150 U16_SET_CP_START(array, 0, offset); 04151 return offset; 04152 } else { 04153 return 0; 04154 } 04155 } 04156 04157 inline int32_t 04158 UnicodeString::getChar32Limit(int32_t offset) const { 04159 int32_t len = length(); 04160 if((uint32_t)offset < (uint32_t)len) { 04161 const UChar *array = getArrayStart(); 04162 U16_SET_CP_LIMIT(array, 0, offset, len); 04163 return offset; 04164 } else { 04165 return len; 04166 } 04167 } 04168 04169 inline UBool 04170 UnicodeString::isEmpty() const { 04171 return fShortLength == 0; 04172 } 04173 04174 //======================================== 04175 // Write implementation methods 04176 //======================================== 04177 inline void 04178 UnicodeString::setLength(int32_t len) { 04179 if(len <= 127) { 04180 fShortLength = (int8_t)len; 04181 } else { 04182 fShortLength = (int8_t)-1; 04183 fUnion.fFields.fLength = len; 04184 } 04185 } 04186 04187 inline void 04188 UnicodeString::setToEmpty() { 04189 fShortLength = 0; 04190 fFlags = kShortString; 04191 } 04192 04193 inline void 04194 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 04195 setLength(len); 04196 fUnion.fFields.fArray = array; 04197 fUnion.fFields.fCapacity = capacity; 04198 } 04199 04200 inline const UChar * 04201 UnicodeString::getTerminatedBuffer() { 04202 if(!isWritable()) { 04203 return 0; 04204 } else { 04205 UChar *array = getArrayStart(); 04206 int32_t len = length(); 04207 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 04208 /* 04209 * kRefCounted: Do not write the NUL if the buffer is shared. 04210 * That is mostly safe, except when the length of one copy was modified 04211 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 04212 * Then the NUL would be written into the middle of another copy's string. 04213 */ 04214 if(!(fFlags&kBufferIsReadonly)) { 04215 /* 04216 * We must not write to a readonly buffer, but it is known to be 04217 * NUL-terminated if len<capacity. 04218 * A shared, allocated buffer (refCount()>1) must not have its contents 04219 * modified, but the NUL at [len] is beyond the string contents, 04220 * and multiple string objects and threads writing the same NUL into the 04221 * same location is harmless. 04222 * In all other cases, the buffer is fully writable and it is anyway safe 04223 * to write the NUL. 04224 * 04225 * Note: An earlier version of this code tested whether there is a NUL 04226 * at [len] already, but, while safe, it generated lots of warnings from 04227 * tools like valgrind and Purify. 04228 */ 04229 array[len] = 0; 04230 } 04231 return array; 04232 } else if(cloneArrayIfNeeded(len+1)) { 04233 array = getArrayStart(); 04234 array[len] = 0; 04235 return array; 04236 } else { 04237 return 0; 04238 } 04239 } 04240 } 04241 04242 inline UnicodeString& 04243 UnicodeString::operator= (UChar ch) 04244 { return doReplace(0, length(), &ch, 0, 1); } 04245 04246 inline UnicodeString& 04247 UnicodeString::operator= (UChar32 ch) 04248 { return replace(0, length(), ch); } 04249 04250 inline UnicodeString& 04251 UnicodeString::setTo(const UnicodeString& srcText, 04252 int32_t srcStart, 04253 int32_t srcLength) 04254 { 04255 unBogus(); 04256 return doReplace(0, length(), srcText, srcStart, srcLength); 04257 } 04258 04259 inline UnicodeString& 04260 UnicodeString::setTo(const UnicodeString& srcText, 04261 int32_t srcStart) 04262 { 04263 unBogus(); 04264 srcText.pinIndex(srcStart); 04265 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 04266 } 04267 04268 inline UnicodeString& 04269 UnicodeString::setTo(const UnicodeString& srcText) 04270 { 04271 unBogus(); 04272 return doReplace(0, length(), srcText, 0, srcText.length()); 04273 } 04274 04275 inline UnicodeString& 04276 UnicodeString::setTo(const UChar *srcChars, 04277 int32_t srcLength) 04278 { 04279 unBogus(); 04280 return doReplace(0, length(), srcChars, 0, srcLength); 04281 } 04282 04283 inline UnicodeString& 04284 UnicodeString::setTo(UChar srcChar) 04285 { 04286 unBogus(); 04287 return doReplace(0, length(), &srcChar, 0, 1); 04288 } 04289 04290 inline UnicodeString& 04291 UnicodeString::setTo(UChar32 srcChar) 04292 { 04293 unBogus(); 04294 return replace(0, length(), srcChar); 04295 } 04296 04297 inline UnicodeString& 04298 UnicodeString::append(const UnicodeString& srcText, 04299 int32_t srcStart, 04300 int32_t srcLength) 04301 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 04302 04303 inline UnicodeString& 04304 UnicodeString::append(const UnicodeString& srcText) 04305 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04306 04307 inline UnicodeString& 04308 UnicodeString::append(const UChar *srcChars, 04309 int32_t srcStart, 04310 int32_t srcLength) 04311 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 04312 04313 inline UnicodeString& 04314 UnicodeString::append(const UChar *srcChars, 04315 int32_t srcLength) 04316 { return doReplace(length(), 0, srcChars, 0, srcLength); } 04317 04318 inline UnicodeString& 04319 UnicodeString::append(UChar srcChar) 04320 { return doReplace(length(), 0, &srcChar, 0, 1); } 04321 04322 inline UnicodeString& 04323 UnicodeString::append(UChar32 srcChar) { 04324 UChar buffer[U16_MAX_LENGTH]; 04325 int32_t _length = 0; 04326 UBool isError = FALSE; 04327 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 04328 return doReplace(length(), 0, buffer, 0, _length); 04329 } 04330 04331 inline UnicodeString& 04332 UnicodeString::operator+= (UChar ch) 04333 { return doReplace(length(), 0, &ch, 0, 1); } 04334 04335 inline UnicodeString& 04336 UnicodeString::operator+= (UChar32 ch) { 04337 return append(ch); 04338 } 04339 04340 inline UnicodeString& 04341 UnicodeString::operator+= (const UnicodeString& srcText) 04342 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04343 04344 inline UnicodeString& 04345 UnicodeString::insert(int32_t start, 04346 const UnicodeString& srcText, 04347 int32_t srcStart, 04348 int32_t srcLength) 04349 { return doReplace(start, 0, srcText, srcStart, srcLength); } 04350 04351 inline UnicodeString& 04352 UnicodeString::insert(int32_t start, 04353 const UnicodeString& srcText) 04354 { return doReplace(start, 0, srcText, 0, srcText.length()); } 04355 04356 inline UnicodeString& 04357 UnicodeString::insert(int32_t start, 04358 const UChar *srcChars, 04359 int32_t srcStart, 04360 int32_t srcLength) 04361 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 04362 04363 inline UnicodeString& 04364 UnicodeString::insert(int32_t start, 04365 const UChar *srcChars, 04366 int32_t srcLength) 04367 { return doReplace(start, 0, srcChars, 0, srcLength); } 04368 04369 inline UnicodeString& 04370 UnicodeString::insert(int32_t start, 04371 UChar srcChar) 04372 { return doReplace(start, 0, &srcChar, 0, 1); } 04373 04374 inline UnicodeString& 04375 UnicodeString::insert(int32_t start, 04376 UChar32 srcChar) 04377 { return replace(start, 0, srcChar); } 04378 04379 04380 inline UnicodeString& 04381 UnicodeString::remove() 04382 { 04383 // remove() of a bogus string makes the string empty and non-bogus 04384 // we also un-alias a read-only alias to deal with NUL-termination 04385 // issues with getTerminatedBuffer() 04386 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 04387 setToEmpty(); 04388 } else { 04389 fShortLength = 0; 04390 } 04391 return *this; 04392 } 04393 04394 inline UnicodeString& 04395 UnicodeString::remove(int32_t start, 04396 int32_t _length) 04397 { 04398 if(start <= 0 && _length == INT32_MAX) { 04399 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 04400 return remove(); 04401 } 04402 return doReplace(start, _length, NULL, 0, 0); 04403 } 04404 04405 inline UnicodeString& 04406 UnicodeString::removeBetween(int32_t start, 04407 int32_t limit) 04408 { return doReplace(start, limit - start, NULL, 0, 0); } 04409 04410 inline UnicodeString & 04411 UnicodeString::retainBetween(int32_t start, int32_t limit) { 04412 truncate(limit); 04413 return doReplace(0, start, NULL, 0, 0); 04414 } 04415 04416 inline UBool 04417 UnicodeString::truncate(int32_t targetLength) 04418 { 04419 if(isBogus() && targetLength == 0) { 04420 // truncate(0) of a bogus string makes the string empty and non-bogus 04421 unBogus(); 04422 return FALSE; 04423 } else if((uint32_t)targetLength < (uint32_t)length()) { 04424 setLength(targetLength); 04425 if(fFlags&kBufferIsReadonly) { 04426 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 04427 } 04428 return TRUE; 04429 } else { 04430 return FALSE; 04431 } 04432 } 04433 04434 inline UnicodeString& 04435 UnicodeString::reverse() 04436 { return doReverse(0, length()); } 04437 04438 inline UnicodeString& 04439 UnicodeString::reverse(int32_t start, 04440 int32_t _length) 04441 { return doReverse(start, _length); } 04442 04443 U_NAMESPACE_END 04444 04445 #endif