uset.h File Reference

C API: Unicode Set. More...

#include "unicode/utypes.h"
#include "unicode/uchar.h"

Go to the source code of this file.

Data Structures

struct  USerializedSet
 A serialized form of a Unicode set. More...

Typedefs

typedef USet USet
 A UnicodeSet.
typedef USerializedSet USerializedSet
 A serialized form of a Unicode set.

Enumerations

enum  {
  USET_IGNORE_SPACE = 1, USET_CASE_INSENSITIVE = 2, USET_CASE = 2, USET_ADD_CASE_MAPPINGS = 4,
  USET_SERIALIZED_STATIC_ARRAY_CAPACITY = 8
}
 Bitmask values to be passed to uset_openPatternOptions() or uset_applyPattern() taking an option parameter. More...

Functions

U_STABLE USet *U_EXPORT2 uset_open (UChar32 start, UChar32 end)
 Creates a USet object that contains the range of characters start.
U_STABLE USet *U_EXPORT2 uset_openPattern (const UChar *pattern, int32_t patternLength, UErrorCode *ec)
 Creates a set from the given pattern.
U_STABLE USet *U_EXPORT2 uset_openPatternOptions (const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
 Creates a set from the given pattern.
U_STABLE void U_EXPORT2 uset_close (USet *set)
 Disposes of the storage used by a USet object.
U_DRAFT void U_EXPORT2 uset_set (USet *set, UChar32 start, UChar32 end)
 Causes the USet object to represent the range start - end.
U_STABLE int32_t U_EXPORT2 uset_applyPattern (USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
 Modifies the set to represent the set specified by the given pattern.
U_DRAFT void U_EXPORT2 uset_applyIntPropertyValue (USet *set, UProperty prop, int32_t value, UErrorCode *ec)
 Modifies the set to contain those code points which have the given value for the given binary or enumerated property, as returned by u_getIntPropertyValue.
U_DRAFT void U_EXPORT2 uset_applyPropertyAlias (USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
 Modifies the set to contain those code points which have the given value for the given property.
U_DRAFT UBool U_EXPORT2 uset_resemblesPattern (const UChar *pattern, int32_t patternLength, int32_t pos)
 Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet pattern.
U_STABLE int32_t U_EXPORT2 uset_toPattern (const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
 Returns a string representation of this set.
U_STABLE void U_EXPORT2 uset_add (USet *set, UChar32 c)
 Adds the given character to the given USet.
U_STABLE void U_EXPORT2 uset_addAll (USet *set, const USet *additionalSet)
 Adds all of the elements in the specified set to this set if they're not already present.
U_STABLE void U_EXPORT2 uset_addRange (USet *set, UChar32 start, UChar32 end)
 Adds the given range of characters to the given USet.
U_STABLE void U_EXPORT2 uset_addString (USet *set, const UChar *str, int32_t strLen)
 Adds the given string to the given USet.
U_DRAFT void U_EXPORT2 uset_addAllCodePoints (USet *set, const UChar *str, int32_t strLen)
 Adds each of the characters in this string to the set.
U_STABLE void U_EXPORT2 uset_remove (USet *set, UChar32 c)
 Removes the given character from the given USet.
U_STABLE void U_EXPORT2 uset_removeRange (USet *set, UChar32 start, UChar32 end)
 Removes the given range of characters from the given USet.
U_STABLE void U_EXPORT2 uset_removeString (USet *set, const UChar *str, int32_t strLen)
 Removes the given string to the given USet.
U_DRAFT void U_EXPORT2 uset_removeAll (USet *set, const USet *removeSet)
 Removes from this set all of its elements that are contained in the specified set.
U_DRAFT void U_EXPORT2 uset_retain (USet *set, UChar32 start, UChar32 end)
 Retain only the elements in this set that are contained in the specified range.
U_DRAFT void U_EXPORT2 uset_retainAll (USet *set, const USet *retain)
 Retains only the elements in this set that are contained in the specified set.
U_DRAFT void U_EXPORT2 uset_compact (USet *set)
 Reallocate this objects internal structures to take up the least possible space, without changing this object's value.
U_STABLE void U_EXPORT2 uset_complement (USet *set)
 Inverts this set.
U_DRAFT void U_EXPORT2 uset_complementAll (USet *set, const USet *complement)
 Complements in this set all elements contained in the specified set.
U_STABLE void U_EXPORT2 uset_clear (USet *set)
 Removes all of the elements from this set.
U_STABLE UBool U_EXPORT2 uset_isEmpty (const USet *set)
 Returns TRUE if the given USet contains no characters and no strings.
U_STABLE UBool U_EXPORT2 uset_contains (const USet *set, UChar32 c)
 Returns TRUE if the given USet contains the given character.
U_STABLE UBool U_EXPORT2 uset_containsRange (const USet *set, UChar32 start, UChar32 end)
 Returns TRUE if the given USet contains all characters c where start <= c && c <= end.
U_STABLE UBool U_EXPORT2 uset_containsString (const USet *set, const UChar *str, int32_t strLen)
 Returns TRUE if the given USet contains the given string.
U_DRAFT int32_t U_EXPORT2 uset_indexOf (const USet *set, UChar32 c)
 Returns the index of the given character within this set, where the set is ordered by ascending code point.
U_DRAFT UChar32 U_EXPORT2 uset_charAt (const USet *set, int32_t index)
 Returns the character at the given index within this set, where the set is ordered by ascending code point.
U_STABLE int32_t U_EXPORT2 uset_size (const USet *set)
 Returns the number of characters and strings contained in the given USet.
U_STABLE int32_t U_EXPORT2 uset_getItemCount (const USet *set)
 Returns the number of items in this set.
U_STABLE int32_t U_EXPORT2 uset_getItem (const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
 Returns an item of this set.
U_DRAFT UBool U_EXPORT2 uset_containsAll (const USet *set1, const USet *set2)
 Returns true if set1 contains all the characters and strings of set2.
U_DRAFT UBool U_EXPORT2 uset_containsAllCodePoints (const USet *set, const UChar *str, int32_t strLen)
 Returns true if this set contains all the characters of the given string.
U_DRAFT UBool U_EXPORT2 uset_containsNone (const USet *set1, const USet *set2)
 Returns true if set1 contains none of the characters and strings of set2.
U_DRAFT UBool U_EXPORT2 uset_containsSome (const USet *set1, const USet *set2)
 Returns true if set1 contains some of the characters and strings of set2.
U_DRAFT UBool U_EXPORT2 uset_equals (const USet *set1, const USet *set2)
 Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_STABLE int32_t U_EXPORT2 uset_serialize (const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
 Serializes this set into an array of 16-bit integers.
U_STABLE UBool U_EXPORT2 uset_getSerializedSet (USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
 Given a serialized array, fill in the given serialized set object.
U_STABLE void U_EXPORT2 uset_setSerializedToOne (USerializedSet *fillSet, UChar32 c)
 Set the USerializedSet to contain the given character (and nothing else).
U_STABLE UBool U_EXPORT2 uset_serializedContains (const USerializedSet *set, UChar32 c)
 Returns TRUE if the given USerializedSet contains the given character.
U_STABLE int32_t U_EXPORT2 uset_getSerializedRangeCount (const USerializedSet *set)
 Returns the number of disjoint ranges of characters contained in the given serialized set.
U_STABLE UBool U_EXPORT2 uset_getSerializedRange (const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
 Returns a range of characters contained in the given serialized set.


Detailed Description

C API: Unicode Set.

This is a C wrapper around the C++ UnicodeSet class.

Definition in file uset.h.


Typedef Documentation

typedef struct USerializedSet USerializedSet
 

A serialized form of a Unicode set.

Limited manipulations are possible directly on a serialized set. See below.

Stable:
ICU 2.4

typedef struct USet USet
 

A UnicodeSet.

Use the uset_* API to manipulate. Create with uset_open*, and destroy with uset_close.

Stable:
ICU 2.4

Definition at line 40 of file uset.h.


Enumeration Type Documentation

anonymous enum
 

Bitmask values to be passed to uset_openPatternOptions() or uset_applyPattern() taking an option parameter.

Stable:
ICU 2.4
Enumerator:
USET_IGNORE_SPACE  Ignore white space within patterns unless quoted or escaped.

Stable:
ICU 2.4
USET_CASE_INSENSITIVE  Enable case insensitive matching.

E.g., "[ab]" with this flag will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will match all except 'a', 'A', 'b', and 'B'. This performs a full closure over case mappings, e.g. U+017F for s.

The resulting set is a superset of the input for the code points but not for the strings. It performs a case mapping closure of the code points and adds full case folding strings for the code points, and reduces strings of the original set to their full case folding equivalents.

This is designed for case-insensitive matches, for example in regular expressions. The full code point case closure allows checking of an input character directly against the closure set. Strings are matched by comparing the case-folded form from the closure set with an incremental case folding of the string in question.

The closure set will also contain single code points if the original set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). This is not necessary (that is, redundant) for the above matching method but results in the same closure sets regardless of whether the original set contained the code point or a string.

Stable:
ICU 2.4
USET_CASE  Bitmask for UnicodeSet::closeOver() indicating letter case.

This may be ORed together with other selectors.

Internal:
Do not use. This API is for interal use only.
USET_ADD_CASE_MAPPINGS  Enable case insensitive matching.

E.g., "[ab]" with this flag will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will match all except 'a', 'A', 'b', and 'B'. This adds the lower-, title-, and uppercase mappings as well as the case folding of each existing element in the set.

Draft:
This API may be changed in the future versions and was introduced in ICU 3.2
USET_SERIALIZED_STATIC_ARRAY_CAPACITY  Enough for any single-code point set.

Internal:
Do not use. This API is for interal use only.

Definition at line 48 of file uset.h.


Function Documentation

U_STABLE void U_EXPORT2 uset_add USet set,
UChar32  c
 

Adds the given character to the given USet.

After this call, uset_contains(set, c) will return TRUE.

Parameters:
set the object to which to add the character
c the character to add
Stable:
ICU 2.4

U_STABLE void U_EXPORT2 uset_addAll USet set,
const USet additionalSet
 

Adds all of the elements in the specified set to this set if they're not already present.

This operation effectively modifies this set so that its value is the union of the two sets. The behavior of this operation is unspecified if the specified collection is modified while the operation is in progress.

Parameters:
set the object to which to add the set
additionalSet the source set whose elements are to be added to this set.
Stable:
ICU 2.6

U_DRAFT void U_EXPORT2 uset_addAllCodePoints USet set,
const UChar *  str,
int32_t  strLen
 

Adds each of the characters in this string to the set.

Thus "ch" => {"c", "h"} If this set already any particular character, it has no effect on that character.

Parameters:
set the object to which to add the character
str the source string
strLen the length of the string or -1 if null terminated.
Draft:
This API may be changed in the future versions and was introduced in ICU 3.4

U_STABLE void U_EXPORT2 uset_addRange USet set,
UChar32  start,
UChar32  end
 

Adds the given range of characters to the given USet.

After this call, uset_contains(set, start, end) will return TRUE.

Parameters:
set the object to which to add the character
start the first character of the range to add, inclusive
end the last character of the range to add, inclusive
Stable:
ICU 2.2

U_STABLE void U_EXPORT2 uset_addString USet set,
const UChar *  str,
int32_t  strLen
 

Adds the given string to the given USet.

After this call, uset_containsString(set, str, strLen) will return TRUE.

Parameters:
set the object to which to add the character
str the string to add
strLen the length of the string or -1 if null terminated.
Stable:
ICU 2.4

U_DRAFT void U_EXPORT2 uset_applyIntPropertyValue USet set,
UProperty  prop,
int32_t  value,
UErrorCode ec
 

Modifies the set to contain those code points which have the given value for the given binary or enumerated property, as returned by u_getIntPropertyValue.

Prior contents of this set are lost.

Parameters:
set the object to contain the code points defined by the property
prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 or UCHAR_INT_START..UCHAR_INT_LIMIT-1 or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
value a value in the range u_getIntPropertyMinValue(prop).. u_getIntPropertyMaxValue(prop), with one exception. If prop is UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but rather a mask value produced by U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented.
ec error code input/output parameter
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE int32_t U_EXPORT2 uset_applyPattern USet set,
const UChar *  pattern,
int32_t  patternLength,
uint32_t  options,
UErrorCode status
 

Modifies the set to represent the set specified by the given pattern.

See the UnicodeSet class description for the syntax of the pattern language. See also the User Guide chapter about UnicodeSet. Empties the set passed before applying the pattern.

Parameters:
set The set to which the pattern is to be applied.
pattern A pointer to UChar string specifying what characters are in the set. The character at pattern[0] must be a '['.
patternLength The length of the UChar string. -1 if NUL terminated.
options A bitmask for options to apply to the pattern. Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
status Returns an error if the pattern cannot be parsed.
Returns:
Upon successful parse, the value is either the index of the character after the closing ']' of the parsed pattern. If the status code indicates failure, then the return value is the index of the error in the source.
Stable:
ICU 2.8

U_DRAFT void U_EXPORT2 uset_applyPropertyAlias USet set,
const UChar *  prop,
int32_t  propLength,
const UChar *  value,
int32_t  valueLength,
UErrorCode ec
 

Modifies the set to contain those code points which have the given value for the given property.

Prior contents of this set are lost.

Parameters:
set the object to contain the code points defined by the given property and value alias
prop a string specifying a property alias, either short or long. The name is matched loosely. See PropertyAliases.txt for names and a description of loose matching. If the value string is empty, then this string is interpreted as either a General_Category value alias, a Script value alias, a binary property alias, or a special ID. Special IDs are matched loosely and correspond to the following sets:
"ANY" = [\u0000-\U0010FFFF], "ASCII" = [\u0000-\u007F], "Assigned" = [:^Cn:].

Parameters:
propLength the length of the prop, or -1 if NULL
value a string specifying a value alias, either short or long. The name is matched loosely. See PropertyValueAliases.txt for names and a description of loose matching. In addition to aliases listed, numeric values and canonical combining classes may be expressed numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string may also be empty.
valueLength the length of the value, or -1 if NULL
ec error code input/output parameter
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_DRAFT UChar32 U_EXPORT2 uset_charAt const USet set,
int32_t  index
 

Returns the character at the given index within this set, where the set is ordered by ascending code point.

If the index is out of range, return (UChar32)-1. The inverse of this method is indexOf().

Parameters:
set the set
index an index from 0..size()-1 to obtain the char for
Returns:
the character at the given index, or (UChar32)-1.
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE void U_EXPORT2 uset_clear USet set  ) 
 

Removes all of the elements from this set.

This set will be empty after this call returns.

Parameters:
set the set
Stable:
ICU 2.4

U_STABLE void U_EXPORT2 uset_close USet set  ) 
 

Disposes of the storage used by a USet object.

This function should be called exactly once for objects returned by uset_open().

Parameters:
set the object to dispose of
Stable:
ICU 2.4

U_DRAFT void U_EXPORT2 uset_compact USet set  ) 
 

Reallocate this objects internal structures to take up the least possible space, without changing this object's value.

Parameters:
set the object on which to perfrom the compact
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE void U_EXPORT2 uset_complement USet set  ) 
 

Inverts this set.

This operation modifies this set so that its value is its complement. This operation does not affect the multicharacter strings, if any.

Parameters:
set the set
Stable:
ICU 2.4

U_DRAFT void U_EXPORT2 uset_complementAll USet set,
const USet complement
 

Complements in this set all elements contained in the specified set.

Any character in the other set will be removed if it is in this set, or will be added if it is not in this set.

Parameters:
set the set with which to complement
complement set that defines which elements will be xor'ed from this set.
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE UBool U_EXPORT2 uset_contains const USet set,
UChar32  c
 

Returns TRUE if the given USet contains the given character.

Parameters:
set the set
c The codepoint to check for within the set
Returns:
true if set contains c
Stable:
ICU 2.4

U_DRAFT UBool U_EXPORT2 uset_containsAll const USet set1,
const USet set2
 

Returns true if set1 contains all the characters and strings of set2.

It answers the question, 'Is set1 a subset of set2?'

Parameters:
set1 set to be checked for containment
set2 set to be checked for containment
Returns:
true if the test condition is met
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_DRAFT UBool U_EXPORT2 uset_containsAllCodePoints const USet set,
const UChar *  str,
int32_t  strLen
 

Returns true if this set contains all the characters of the given string.

This is does not check containment of grapheme clusters, like uset_containsString.

Parameters:
set set of characters to be checked for containment
str string containing codepoints to be checked for containment
strLen the length of the string or -1 if null terminated.
Returns:
true if the test condition is met
Draft:
This API may be changed in the future versions and was introduced in ICU 3.4

U_DRAFT UBool U_EXPORT2 uset_containsNone const USet set1,
const USet set2
 

Returns true if set1 contains none of the characters and strings of set2.

It answers the question, 'Is set1 a disjoint set of set2?'

Parameters:
set1 set to be checked for containment
set2 set to be checked for containment
Returns:
true if the test condition is met
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE UBool U_EXPORT2 uset_containsRange const USet set,
UChar32  start,
UChar32  end
 

Returns TRUE if the given USet contains all characters c where start <= c && c <= end.

Parameters:
set the set
start the first character of the range to test, inclusive
end the last character of the range to test, inclusive
Returns:
TRUE if set contains the range
Stable:
ICU 2.2

U_DRAFT UBool U_EXPORT2 uset_containsSome const USet set1,
const USet set2
 

Returns true if set1 contains some of the characters and strings of set2.

It answers the question, 'Does set1 and set2 have an intersection?'

Parameters:
set1 set to be checked for containment
set2 set to be checked for containment
Returns:
true if the test condition is met
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE UBool U_EXPORT2 uset_containsString const USet set,
const UChar *  str,
int32_t  strLen
 

Returns TRUE if the given USet contains the given string.

Parameters:
set the set
str the string
strLen the length of the string or -1 if null terminated.
Returns:
true if set contains str
Stable:
ICU 2.4

U_DRAFT UBool U_EXPORT2 uset_equals const USet set1,
const USet set2
 

Returns true if set1 contains all of the characters and strings of set2, and vis versa.

It answers the question, 'Is set1 equal to set2?'

Parameters:
set1 set to be checked for containment
set2 set to be checked for containment
Returns:
true if the test condition is met
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE int32_t U_EXPORT2 uset_getItem const USet set,
int32_t  itemIndex,
UChar32 start,
UChar32 end,
UChar *  str,
int32_t  strCapacity,
UErrorCode ec
 

Returns an item of this set.

An item is either a range of characters or a single multicharacter string.

Parameters:
set the set
itemIndex a non-negative integer in the range 0.. uset_getItemCount(set)-1
start pointer to variable to receive first character in range, inclusive
end pointer to variable to receive last character in range, inclusive
str buffer to receive the string, may be NULL
strCapacity capacity of str, or 0 if str is NULL
ec error code
Returns:
the length of the string (>= 2), or 0 if the item is a range, in which case it is the range *start..*end, or -1 if itemIndex is out of range
Stable:
ICU 2.4

U_STABLE int32_t U_EXPORT2 uset_getItemCount const USet set  ) 
 

Returns the number of items in this set.

An item is either a range of characters or a single multicharacter string.

Parameters:
set the set
Returns:
a non-negative integer counting the character ranges and/or strings contained in set
Stable:
ICU 2.4

U_STABLE UBool U_EXPORT2 uset_getSerializedRange const USerializedSet set,
int32_t  rangeIndex,
UChar32 pStart,
UChar32 pEnd
 

Returns a range of characters contained in the given serialized set.

Parameters:
set the serialized set
rangeIndex a non-negative integer in the range 0.. uset_getSerializedRangeCount(set)-1
pStart pointer to variable to receive first character in range, inclusive
pEnd pointer to variable to receive last character in range, inclusive
Returns:
true if rangeIndex is valid, otherwise false
Stable:
ICU 2.4

U_STABLE int32_t U_EXPORT2 uset_getSerializedRangeCount const USerializedSet set  ) 
 

Returns the number of disjoint ranges of characters contained in the given serialized set.

Ignores any strings contained in the set.

Parameters:
set the serialized set
Returns:
a non-negative integer counting the character ranges contained in set
Stable:
ICU 2.4

U_STABLE UBool U_EXPORT2 uset_getSerializedSet USerializedSet fillSet,
const uint16_t src,
int32_t  srcLength
 

Given a serialized array, fill in the given serialized set object.

Parameters:
fillSet pointer to result
src pointer to start of array
srcLength length of array
Returns:
true if the given array is valid, otherwise false
Stable:
ICU 2.4

U_DRAFT int32_t U_EXPORT2 uset_indexOf const USet set,
UChar32  c
 

Returns the index of the given character within this set, where the set is ordered by ascending code point.

If the character is not in this set, return -1. The inverse of this method is charAt().

Parameters:
set the set
c the character to obtain the index for
Returns:
an index from 0..size()-1, or -1
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE UBool U_EXPORT2 uset_isEmpty const USet set  ) 
 

Returns TRUE if the given USet contains no characters and no strings.

Parameters:
set the set
Returns:
true if set is empty
Stable:
ICU 2.4

U_STABLE USet* U_EXPORT2 uset_open UChar32  start,
UChar32  end
 

Creates a USet object that contains the range of characters start.

.end, inclusive.

Parameters:
start first character of the range, inclusive
end last character of the range, inclusive
Returns:
a newly created USet. The caller must call uset_close() on it when done.
Stable:
ICU 2.4

U_STABLE USet* U_EXPORT2 uset_openPattern const UChar *  pattern,
int32_t  patternLength,
UErrorCode ec
 

Creates a set from the given pattern.

See the UnicodeSet class description for the syntax of the pattern language.

Parameters:
pattern a string specifying what characters are in the set
patternLength the length of the pattern, or -1 if null terminated
ec the error code
Stable:
ICU 2.4

U_STABLE USet* U_EXPORT2 uset_openPatternOptions const UChar *  pattern,
int32_t  patternLength,
uint32_t  options,
UErrorCode ec
 

Creates a set from the given pattern.

See the UnicodeSet class description for the syntax of the pattern language.

Parameters:
pattern a string specifying what characters are in the set
patternLength the length of the pattern, or -1 if null terminated
options bitmask for options to apply to the pattern. Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
ec the error code
Stable:
ICU 2.4

U_STABLE void U_EXPORT2 uset_remove USet set,
UChar32  c
 

Removes the given character from the given USet.

After this call, uset_contains(set, c) will return FALSE.

Parameters:
set the object from which to remove the character
c the character to remove
Stable:
ICU 2.4

U_DRAFT void U_EXPORT2 uset_removeAll USet set,
const USet removeSet
 

Removes from this set all of its elements that are contained in the specified set.

This operation effectively modifies this set so that its value is the asymmetric set difference of the two sets.

Parameters:
set the object from which the elements are to be removed
removeSet the object that defines which elements will be removed from this set
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE void U_EXPORT2 uset_removeRange USet set,
UChar32  start,
UChar32  end
 

Removes the given range of characters from the given USet.

After this call, uset_contains(set, start, end) will return FALSE.

Parameters:
set the object to which to add the character
start the first character of the range to remove, inclusive
end the last character of the range to remove, inclusive
Stable:
ICU 2.2

U_STABLE void U_EXPORT2 uset_removeString USet set,
const UChar *  str,
int32_t  strLen
 

Removes the given string to the given USet.

After this call, uset_containsString(set, str, strLen) will return FALSE.

Parameters:
set the object to which to add the character
str the string to remove
strLen the length of the string or -1 if null terminated.
Stable:
ICU 2.4

U_DRAFT UBool U_EXPORT2 uset_resemblesPattern const UChar *  pattern,
int32_t  patternLength,
int32_t  pos
 

Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet pattern.

Parameters:
pattern a string specifying the pattern
patternLength the length of the pattern, or -1 if NULL
pos the given position
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_DRAFT void U_EXPORT2 uset_retain USet set,
UChar32  start,
UChar32  end
 

Retain only the elements in this set that are contained in the specified range.

If start > end then an empty range is retained, leaving the set empty. This is equivalent to a boolean logic AND, or a set INTERSECTION.

Parameters:
set the object for which to retain only the specified range
start first character, inclusive, of range to be retained to this set.
end last character, inclusive, of range to be retained to this set.
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_DRAFT void U_EXPORT2 uset_retainAll USet set,
const USet retain
 

Retains only the elements in this set that are contained in the specified set.

In other words, removes from this set all of its elements that are not contained in the specified set. This operation effectively modifies this set so that its value is the intersection of the two sets.

Parameters:
set the object on which to perform the retain
retain set that defines which elements this set will retain
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE int32_t U_EXPORT2 uset_serialize const USet set,
uint16_t dest,
int32_t  destCapacity,
UErrorCode pErrorCode
 

Serializes this set into an array of 16-bit integers.

Serialization (currently) only records the characters in the set; multicharacter strings are ignored.

The array has following format (each line is one 16-bit integer):

length = (n+2*m) | (m!=0?0x8000:0) bmpLength = n; present if m!=0 bmp[0] bmp[1] ... bmp[n-1] supp-high[0] supp-low[0] supp-high[1] supp-low[1] ... supp-high[m-1] supp-low[m-1]

The array starts with a header. After the header are n bmp code points, then m supplementary code points. Either n or m or both may be zero. n+2*m is always <= 0x7FFF.

If there are no supplementary characters (if m==0) then the header is one 16-bit integer, 'length', with value n.

If there are supplementary characters (if m!=0) then the header is two 16-bit integers. The first, 'length', has value (n+2*m)|0x8000. The second, 'bmpLength', has value n.

After the header the code points are stored in ascending order. Supplementary code points are stored as most significant 16 bits followed by least significant 16 bits.

Parameters:
set the set
dest pointer to buffer of destCapacity 16-bit integers. May be NULL only if destCapacity is zero.
destCapacity size of dest, or zero. Must not be negative.
pErrorCode pointer to the error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
Returns:
the total length of the serialized format, including the header, that is, n+2*m+(m!=0?2:1), or 0 on error other than U_BUFFER_OVERFLOW_ERROR.
Stable:
ICU 2.4

U_STABLE UBool U_EXPORT2 uset_serializedContains const USerializedSet set,
UChar32  c
 

Returns TRUE if the given USerializedSet contains the given character.

Parameters:
set the serialized set
c The codepoint to check for within the set
Returns:
true if set contains c
Stable:
ICU 2.4

U_DRAFT void U_EXPORT2 uset_set USet set,
UChar32  start,
UChar32  end
 

Causes the USet object to represent the range start - end.

If start > end then this USet is set to an empty range.

Parameters:
set the object to set to the given range
start first character in the set, inclusive
end last character in the set, inclusive
Draft:
This API may be changed in the future versions and was introduced in ICU 3.2

U_STABLE void U_EXPORT2 uset_setSerializedToOne USerializedSet fillSet,
UChar32  c
 

Set the USerializedSet to contain the given character (and nothing else).

Parameters:
fillSet pointer to result
c The codepoint to set
Stable:
ICU 2.4

U_STABLE int32_t U_EXPORT2 uset_size const USet set  ) 
 

Returns the number of characters and strings contained in the given USet.

Parameters:
set the set
Returns:
a non-negative integer counting the characters and strings contained in set
Stable:
ICU 2.4

U_STABLE int32_t U_EXPORT2 uset_toPattern const USet set,
UChar *  result,
int32_t  resultCapacity,
UBool  escapeUnprintable,
UErrorCode ec
 

Returns a string representation of this set.

If the result of calling this function is passed to a uset_openPattern(), it will produce another set that is equal to this one.

Parameters:
set the set
result the string to receive the rules, may be NULL
resultCapacity the capacity of result, may be 0 if result is NULL
escapeUnprintable if TRUE then convert unprintable character to their hex escape representations, \uxxxx or \Uxxxxxxxx. Unprintable characters are those other than U+000A, U+0020..U+007E.
ec error code.
Returns:
length of string, possibly larger than resultCapacity
Stable:
ICU 2.4


Generated on Tue Sep 13 11:08:19 2005 for ICU 3.4 by  doxygen 1.4.4