Go to the documentation of this file.
22 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
23 # define UBRK_TYPEDEF_UBREAK_ITERATOR
31 #if !UCONFIG_NO_BREAK_ITERATION
109 #ifndef U_HIDE_DEPRECATED_API
124 #endif // U_HIDE_DEPRECATED_API
130 #define UBRK_DONE ((int32_t) -1)
293 const UChar * text, int32_t textLength,
318 int32_t *pBufferSize,
321 #ifndef U_HIDE_DEPRECATED_API
327 #define U_BRK_SAFECLONE_BUFFERSIZE 1
340 #if U_SHOW_CPLUSPLUS_API
623 uint8_t * binaryRules, int32_t rulesCapacity,
@ UBRK_SENTENCE_TERM
Tag value for for sentences ending with a sentence terminator ('.
int32_t ubrk_getBinaryRules(UBreakIterator *bi, uint8_t *binaryRules, int32_t rulesCapacity, UErrorCode *status)
Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
C API: Parse Error Information.
int32_t ubrk_first(UBreakIterator *bi)
Set the iterator position to zero, the start of the text being scanned.
@ UBRK_LINE_SOFT_LIMIT
Upper bound for soft line breaks.
const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
Basic definitions for ICU, for both C and C++ APIs.
int32_t ubrk_previous(UBreakIterator *bi)
Set the iterator position to the boundary preceding the current boundary.
int8_t UBool
The ICU boolean type.
@ UBRK_WORD_LETTER
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters,...
int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
@ UBRK_SENTENCE_SEP
Tag value for for sentences that do not contain an ending sentence terminator ('.
void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
@ UBRK_WORD_NONE_LIMIT
Upper bound for tags for uncategorized words.
C API: Abstract Unicode Text API.
@ UBRK_WORD_NUMBER_LIMIT
Tag value for words that appear to be numbers, upper limit.
@ UBRK_TITLE
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specified position is a boundary position.
@ UBRK_WORD_IDEO_LIMIT
Tag value for words containing ideographic characters, upper limit.
@ UBRK_LINE_HARD_LIMIT
Upper bound for hard line breaks.
int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
int32_t ubrk_last(UBreakIterator *bi)
Set the iterator position to the index immediately beyond the last character in the text being scanne...
@ UBRK_WORD_KANA_LIMIT
Tag value for words containing kana characters, upper limit.
@ UBRK_COUNT
One more than the highest normal UBreakIteratorType value.
A UParseError struct is used to returned detailed information about parsing errors.
void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Advance the iterator to the first boundary following the specified offset.
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
@ UBRK_WORD_KANA
Tag value for words containing kana characters, lower limit.
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
@ UBRK_WORD_NONE
Tag value for "words" that do not fit into any of other categories.
UBreakIteratorType
The possible types of text boundaries.
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
@ UBRK_WORD_NUMBER
Tag value for words that appear to be numbers, lower limit.
@ UBRK_SENTENCE
Sentence breaks.
@ UBRK_LINE_SOFT
Tag value for soft line breaks, positions at which a line break is acceptable but not required
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Set the iterator position to the first boundary preceding the specified offset.
void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
@ UBRK_WORD_IDEO
Tag value for words containing ideographic characters, lower limit.
void ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
UBreakIterator * ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
@ UBRK_WORD_LETTER_LIMIT
Tag value for words containing letters, upper limit
@ UBRK_SENTENCE_SEP_LIMIT
Upper bound for tags for sentences ended by a separator.
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
@ UBRK_CHARACTER
Character breaks.
UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
#define U_NAMESPACE_BEGIN
int32_t ubrk_next(UBreakIterator *bi)
Advance the iterator to the boundary following the current boundary.
int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position.
@ UBRK_LINE_HARD
Tag value for a hard, or mandatory line break
@ UBRK_SENTENCE_TERM_LIMIT
Upper bound for tags for sentences ended by sentence terminators.
int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.