00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011
00012 #include "unicode/utypes.h"
00013
00019 #if !UCONFIG_NO_NORMALIZATION
00020
00021 #include "unicode/uobject.h"
00022 #include "unicode/unistr.h"
00023 #include "unicode/chariter.h"
00024 #include "unicode/unorm.h"
00025
00026
00027 struct UCharIterator;
00028 typedef struct UCharIterator UCharIterator;
00030 U_NAMESPACE_BEGIN
00120 class U_COMMON_API Normalizer : public UObject {
00121 public:
00127 enum {
00128 DONE=0xffff
00129 };
00130
00131
00132
00143 Normalizer(const UnicodeString& str, UNormalizationMode mode);
00144
00156 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00157
00168 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00169
00175 Normalizer(const Normalizer& copy);
00176
00181 virtual ~Normalizer();
00182
00183
00184
00185
00186
00187
00202 static void U_EXPORT2 normalize(const UnicodeString& source,
00203 UNormalizationMode mode, int32_t options,
00204 UnicodeString& result,
00205 UErrorCode &status);
00206
00224 static void U_EXPORT2 compose(const UnicodeString& source,
00225 UBool compat, int32_t options,
00226 UnicodeString& result,
00227 UErrorCode &status);
00228
00246 static void U_EXPORT2 decompose(const UnicodeString& source,
00247 UBool compat, int32_t options,
00248 UnicodeString& result,
00249 UErrorCode &status);
00250
00271 static inline UNormalizationCheckResult
00272 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00273
00287 static inline UNormalizationCheckResult
00288 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00289
00310 static inline UBool
00311 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00312
00328 static inline UBool
00329 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00330
00360 static UnicodeString &
00361 U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
00362 UnicodeString &result,
00363 UNormalizationMode mode, int32_t options,
00364 UErrorCode &errorCode);
00365
00430 static inline int32_t
00431 compare(const UnicodeString &s1, const UnicodeString &s2,
00432 uint32_t options,
00433 UErrorCode &errorCode);
00434
00435
00436
00437
00438
00447 UChar32 current(void);
00448
00457 UChar32 first(void);
00458
00467 UChar32 last(void);
00468
00483 UChar32 next(void);
00484
00499 UChar32 previous(void);
00500
00510 void setIndexOnly(int32_t index);
00511
00517 void reset(void);
00518
00533 int32_t getIndex(void) const;
00534
00543 int32_t startIndex(void) const;
00544
00555 int32_t endIndex(void) const;
00556
00565 UBool operator==(const Normalizer& that) const;
00566
00575 inline UBool operator!=(const Normalizer& that) const;
00576
00583 Normalizer* clone(void) const;
00584
00591 int32_t hashCode(void) const;
00592
00593
00594
00595
00596
00612 void setMode(UNormalizationMode newMode);
00613
00624 UNormalizationMode getUMode(void) const;
00625
00642 void setOption(int32_t option,
00643 UBool value);
00644
00655 UBool getOption(int32_t option) const;
00656
00665 void setText(const UnicodeString& newText,
00666 UErrorCode &status);
00667
00676 void setText(const CharacterIterator& newText,
00677 UErrorCode &status);
00678
00688 void setText(const UChar* newText,
00689 int32_t length,
00690 UErrorCode &status);
00697 void getText(UnicodeString& result);
00698
00704 static UClassID U_EXPORT2 getStaticClassID();
00705
00711 virtual UClassID getDynamicClassID() const;
00712
00713 private:
00714
00715
00716
00717
00718 Normalizer();
00719 Normalizer &operator=(const Normalizer &that);
00720
00721
00722
00723 UBool nextNormalize();
00724 UBool previousNormalize();
00725
00726 void init(CharacterIterator *iter);
00727 void clearBuffer(void);
00728
00729
00730
00731
00732
00733 UNormalizationMode fUMode;
00734 int32_t fOptions;
00735
00736
00737 UCharIterator *text;
00738
00739
00740
00741 int32_t currentIndex, nextIndex;
00742
00743
00744 UnicodeString buffer;
00745 int32_t bufferPos;
00746
00747 };
00748
00749
00750
00751
00752
00753 inline UBool
00754 Normalizer::operator!= (const Normalizer& other) const
00755 { return ! operator==(other); }
00756
00757 inline UNormalizationCheckResult
00758 Normalizer::quickCheck(const UnicodeString& source,
00759 UNormalizationMode mode,
00760 UErrorCode &status) {
00761 if(U_FAILURE(status)) {
00762 return UNORM_MAYBE;
00763 }
00764
00765 return unorm_quickCheck(source.getBuffer(), source.length(),
00766 mode, &status);
00767 }
00768
00769 inline UNormalizationCheckResult
00770 Normalizer::quickCheck(const UnicodeString& source,
00771 UNormalizationMode mode, int32_t options,
00772 UErrorCode &status) {
00773 if(U_FAILURE(status)) {
00774 return UNORM_MAYBE;
00775 }
00776
00777 return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00778 mode, options, &status);
00779 }
00780
00781 inline UBool
00782 Normalizer::isNormalized(const UnicodeString& source,
00783 UNormalizationMode mode,
00784 UErrorCode &status) {
00785 if(U_FAILURE(status)) {
00786 return FALSE;
00787 }
00788
00789 return unorm_isNormalized(source.getBuffer(), source.length(),
00790 mode, &status);
00791 }
00792
00793 inline UBool
00794 Normalizer::isNormalized(const UnicodeString& source,
00795 UNormalizationMode mode, int32_t options,
00796 UErrorCode &status) {
00797 if(U_FAILURE(status)) {
00798 return FALSE;
00799 }
00800
00801 return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00802 mode, options, &status);
00803 }
00804
00805 inline int32_t
00806 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00807 uint32_t options,
00808 UErrorCode &errorCode) {
00809
00810 return unorm_compare(s1.getBuffer(), s1.length(),
00811 s2.getBuffer(), s2.length(),
00812 options,
00813 &errorCode);
00814 }
00815
00816 U_NAMESPACE_END
00817
00818 #endif
00819
00820 #endif // NORMLZR_H