ICU 62.1  62.1
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 #include "unicode/localpointer.h"
35 
36 #ifndef UCNV_H
37 struct USet;
43 typedef struct USet USet;
44 #endif
45 
51 enum {
57 
85 
95 };
96 
152 typedef enum USetSpanCondition {
201 #ifndef U_HIDE_DEPRECATED_API
202 
207 #endif // U_HIDE_DEPRECATED_API
209 
210 enum {
218 };
219 
225 typedef struct USerializedSet {
230  const uint16_t *array;
235  int32_t bmpLength;
240  int32_t length;
247 
248 /*********************************************************************
249  * USet API
250  *********************************************************************/
251 
259 U_STABLE USet* U_EXPORT2
260 uset_openEmpty(void);
261 
272 U_STABLE USet* U_EXPORT2
273 uset_open(UChar32 start, UChar32 end);
274 
284 U_STABLE USet* U_EXPORT2
285 uset_openPattern(const UChar* pattern, int32_t patternLength,
286  UErrorCode* ec);
287 
299 U_STABLE USet* U_EXPORT2
300 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
301  uint32_t options,
302  UErrorCode* ec);
303 
310 U_STABLE void U_EXPORT2
311 uset_close(USet* set);
312 
313 #if U_SHOW_CPLUSPLUS_API
314 
316 
327 
329 
330 #endif
331 
341 U_STABLE USet * U_EXPORT2
342 uset_clone(const USet *set);
343 
353 U_STABLE UBool U_EXPORT2
354 uset_isFrozen(const USet *set);
355 
370 U_STABLE void U_EXPORT2
371 uset_freeze(USet *set);
372 
383 U_STABLE USet * U_EXPORT2
384 uset_cloneAsThawed(const USet *set);
385 
395 U_STABLE void U_EXPORT2
396 uset_set(USet* set,
397  UChar32 start, UChar32 end);
398 
420 U_STABLE int32_t U_EXPORT2
422  const UChar *pattern, int32_t patternLength,
423  uint32_t options,
424  UErrorCode *status);
425 
448 U_STABLE void U_EXPORT2
450  UProperty prop, int32_t value, UErrorCode* ec);
451 
487 U_STABLE void U_EXPORT2
489  const UChar *prop, int32_t propLength,
490  const UChar *value, int32_t valueLength,
491  UErrorCode* ec);
492 
502 U_STABLE UBool U_EXPORT2
503 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
504  int32_t pos);
505 
521 U_STABLE int32_t U_EXPORT2
522 uset_toPattern(const USet* set,
523  UChar* result, int32_t resultCapacity,
524  UBool escapeUnprintable,
525  UErrorCode* ec);
526 
535 U_STABLE void U_EXPORT2
536 uset_add(USet* set, UChar32 c);
537 
550 U_STABLE void U_EXPORT2
551 uset_addAll(USet* set, const USet *additionalSet);
552 
562 U_STABLE void U_EXPORT2
563 uset_addRange(USet* set, UChar32 start, UChar32 end);
564 
574 U_STABLE void U_EXPORT2
575 uset_addString(USet* set, const UChar* str, int32_t strLen);
576 
586 U_STABLE void U_EXPORT2
587 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
588 
597 U_STABLE void U_EXPORT2
598 uset_remove(USet* set, UChar32 c);
599 
609 U_STABLE void U_EXPORT2
610 uset_removeRange(USet* set, UChar32 start, UChar32 end);
611 
621 U_STABLE void U_EXPORT2
622 uset_removeString(USet* set, const UChar* str, int32_t strLen);
623 
635 U_STABLE void U_EXPORT2
636 uset_removeAll(USet* set, const USet* removeSet);
637 
652 U_STABLE void U_EXPORT2
653 uset_retain(USet* set, UChar32 start, UChar32 end);
654 
667 U_STABLE void U_EXPORT2
668 uset_retainAll(USet* set, const USet* retain);
669 
678 U_STABLE void U_EXPORT2
679 uset_compact(USet* set);
680 
689 U_STABLE void U_EXPORT2
690 uset_complement(USet* set);
691 
703 U_STABLE void U_EXPORT2
704 uset_complementAll(USet* set, const USet* complement);
705 
713 U_STABLE void U_EXPORT2
714 uset_clear(USet* set);
715 
742 U_STABLE void U_EXPORT2
743 uset_closeOver(USet* set, int32_t attributes);
744 
751 U_STABLE void U_EXPORT2
753 
761 U_STABLE UBool U_EXPORT2
762 uset_isEmpty(const USet* set);
763 
772 U_STABLE UBool U_EXPORT2
773 uset_contains(const USet* set, UChar32 c);
774 
784 U_STABLE UBool U_EXPORT2
785 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
786 
795 U_STABLE UBool U_EXPORT2
796 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
797 
808 U_STABLE int32_t U_EXPORT2
809 uset_indexOf(const USet* set, UChar32 c);
810 
821 U_STABLE UChar32 U_EXPORT2
822 uset_charAt(const USet* set, int32_t charIndex);
823 
832 U_STABLE int32_t U_EXPORT2
833 uset_size(const USet* set);
834 
843 U_STABLE int32_t U_EXPORT2
844 uset_getItemCount(const USet* set);
845 
864 U_STABLE int32_t U_EXPORT2
865 uset_getItem(const USet* set, int32_t itemIndex,
866  UChar32* start, UChar32* end,
867  UChar* str, int32_t strCapacity,
868  UErrorCode* ec);
869 
878 U_STABLE UBool U_EXPORT2
879 uset_containsAll(const USet* set1, const USet* set2);
880 
891 U_STABLE UBool U_EXPORT2
892 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
893 
902 U_STABLE UBool U_EXPORT2
903 uset_containsNone(const USet* set1, const USet* set2);
904 
913 U_STABLE UBool U_EXPORT2
914 uset_containsSome(const USet* set1, const USet* set2);
915 
935 U_STABLE int32_t U_EXPORT2
936 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
937 
956 U_STABLE int32_t U_EXPORT2
957 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
958 
978 U_STABLE int32_t U_EXPORT2
979 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
980 
999 U_STABLE int32_t U_EXPORT2
1000 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1001 
1010 U_STABLE UBool U_EXPORT2
1011 uset_equals(const USet* set1, const USet* set2);
1012 
1013 /*********************************************************************
1014  * Serialized set API
1015  *********************************************************************/
1016 
1066 U_STABLE int32_t U_EXPORT2
1067 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1068 
1077 U_STABLE UBool U_EXPORT2
1078 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1079 
1087 U_STABLE void U_EXPORT2
1089 
1098 U_STABLE UBool U_EXPORT2
1100 
1110 U_STABLE int32_t U_EXPORT2
1112 
1126 U_STABLE UBool U_EXPORT2
1127 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1128  UChar32* pStart, UChar32* pEnd);
1129 
1130 #endif
uset_addAll
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
uset_getSerializedRangeCount
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
USerializedSet::staticArray
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:245
uset_serialize
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
USerializedSet::array
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:230
uset_complementAll
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
uset_serializedContains
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
USET_SPAN_NOT_CONTAINED
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition: uset.h:165
uset_indexOf
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
uset_toPattern
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
uset_containsRange
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end.
uset_openEmpty
USet * uset_openEmpty(void)
Create an empty USet object.
uset_spanBack
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
USerializedSet
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
uset_closeOver
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
uset_containsAllCodePoints
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
UProperty
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:165
uset_close
void uset_close(USet *set)
Disposes of the storage used by a USet object.
USetSpanCondition
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:152
uset_getItemCount
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
uset_spanBackUTF8
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
uset_getSerializedRange
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
USET_SPAN_CONTAINED
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition: uset.h:180
uset_openPatternOptions
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
uset_equals
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
uset_clone
USet * uset_clone(const USet *set)
Returns a copy of this object.
uset_applyPattern
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
USET_ADD_CASE_MAPPINGS
@ USET_ADD_CASE_MAPPINGS
Enable case insensitive matching.
Definition: uset.h:94
uset_retain
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:400
uset_containsSome
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
uset_isFrozen
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
uset_addRange
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:396
uset_resemblesPattern
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
uset_charAt
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
uset_clear
void uset_clear(USet *set)
Removes all of the elements from this set.
USerializedSet::length
int32_t length
The total length of the array.
Definition: uset.h:240
uset_retainAll
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
USET_SERIALIZED_STATIC_ARRAY_CAPACITY
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition: uset.h:217
uset_isEmpty
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
uset_applyIntPropertyValue
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
uset_setSerializedToOne
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
uset_removeString
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
uset_cloneAsThawed
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
uset_addAllCodePoints
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
uset_compact
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_DEFINE_LOCAL_OPEN_POINTER
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
Definition: localpointer.h:487
uset_containsAll
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
uset_containsNone
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
uset_getSerializedSet
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
LocalUSetPointer
uset_applyPropertyAlias
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
uset_spanUTF8
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
uset_openPattern
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
uset_removeRange
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
localpointer.h
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
USET_SPAN_SIMPLE
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition: uset.h:200
uset_removeAll
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
uset_containsString
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
USerializedSet
A serialized form of a Unicode set.
Definition: uset.h:225
uset_span
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_STABLE
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
uchar.h
C API: Unicode Properties.
uset_size
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
uset_getItem
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
UChar
uint16_t UChar
Definition: umachine.h:353
USet
struct USet USet
Definition: ucnv.h:69
USET_IGNORE_SPACE
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:56
uset_add
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_NAMESPACE_END
#define U_NAMESPACE_END
Definition: uversion.h:138
uset_open
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
uset_set
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
Definition: uversion.h:137
uset_contains
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
uset_remove
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
uset_addString
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
USerializedSet::bmpLength
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:235
USET_SPAN_CONDITION_COUNT
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition: uset.h:206
uset_removeAllStrings
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
USET_CASE_INSENSITIVE
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uset.h:84
uset_complement
void uset_complement(USet *set)
Inverts this set.
uset_freeze
void uset_freeze(USet *set)
Freeze the set (make it immutable).