ICU 62.1  62.1
rbbi.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 1999-2016 International Business Machines Corporation *
6 * and others. All rights reserved. *
7 ***************************************************************************
8 
9 **********************************************************************
10 * Date Name Description
11 * 10/22/99 alan Creation.
12 * 11/11/99 rgillam Complete port from Java.
13 **********************************************************************
14 */
15 
16 #ifndef RBBI_H
17 #define RBBI_H
18 
19 #include "unicode/utypes.h"
20 
26 #if !UCONFIG_NO_BREAK_ITERATION
27 
28 #include "unicode/brkiter.h"
29 #include "unicode/udata.h"
30 #include "unicode/parseerr.h"
31 #include "unicode/schriter.h"
32 
34 
36 class LanguageBreakEngine;
37 struct RBBIDataHeader;
38 class RBBIDataWrapper;
39 class UnhandledEngine;
40 class UStack;
41 
54 
55 private:
60  UText fText;
61 
62 #ifndef U_HIDE_INTERNAL_API
63 public:
64 #endif /* U_HIDE_INTERNAL_API */
65 
70  RBBIDataWrapper *fData;
71 private:
72 
77  int32_t fPosition;
78 
82  int32_t fRuleStatusIndex;
83 
87  class BreakCache;
88  BreakCache *fBreakCache;
89 
94  class DictionaryCache;
95  DictionaryCache *fDictionaryCache;
96 
104  UStack *fLanguageBreakEngines;
105 
113  UnhandledEngine *fUnhandledBreakEngine;
114 
120  uint32_t fDictionaryCharCount;
121 
127  CharacterIterator *fCharIter;
128 
134  StringCharacterIterator fSCharIter;
135 
139  UBool fDone;
140 
141  //=======================================================================
142  // constructors
143  //=======================================================================
144 
155  RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
156 
158  friend class RBBIRuleBuilder;
160  friend class BreakIterator;
161 
162 public:
163 
169 
177 
187  UParseError &parseError,
188  UErrorCode &status);
189 
213  RuleBasedBreakIterator(const uint8_t *compiledRules,
214  uint32_t ruleLength,
215  UErrorCode &status);
216 
230 
235  virtual ~RuleBasedBreakIterator();
236 
245 
254  virtual UBool operator==(const BreakIterator& that) const;
255 
263  UBool operator!=(const BreakIterator& that) const;
264 
275  virtual BreakIterator* clone() const;
276 
282  virtual int32_t hashCode(void) const;
283 
289  virtual const UnicodeString& getRules(void) const;
290 
291  //=======================================================================
292  // BreakIterator overrides
293  //=======================================================================
294 
320  virtual CharacterIterator& getText(void) const;
321 
322 
337  virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
338 
346  virtual void adoptText(CharacterIterator* newText);
347 
359  virtual void setText(const UnicodeString& newText);
360 
374  virtual void setText(UText *text, UErrorCode &status);
375 
381  virtual int32_t first(void);
382 
388  virtual int32_t last(void);
389 
400  virtual int32_t next(int32_t n);
401 
407  virtual int32_t next(void);
408 
414  virtual int32_t previous(void);
415 
423  virtual int32_t following(int32_t offset);
424 
432  virtual int32_t preceding(int32_t offset);
433 
442  virtual UBool isBoundary(int32_t offset);
443 
452  virtual int32_t current(void) const;
453 
454 
486  virtual int32_t getRuleStatus() const;
487 
511  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
512 
524  virtual UClassID getDynamicClassID(void) const;
525 
537  static UClassID U_EXPORT2 getStaticClassID(void);
538 
565  virtual BreakIterator * createBufferClone(void *stackBuffer,
566  int32_t &BufferSize,
567  UErrorCode &status);
568 
569 
587  virtual const uint8_t *getBinaryRules(uint32_t &length);
588 
614  virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
615 
616 
617 private:
618  //=======================================================================
619  // implementation
620  //=======================================================================
626  void reset(void);
627 
632  void init(UErrorCode &status);
633 
643  int32_t handleSafePrevious(int32_t fromPosition);
644 
657  int32_t handleNext();
658 
659 
666  const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
667 
668  public:
669 #ifndef U_HIDE_INTERNAL_API
670 
674  void dumpCache();
675 
680  void dumpTables();
681 
682 #endif /* U_HIDE_INTERNAL_API */
683 };
684 
685 //------------------------------------------------------------------------------
686 //
687 // Inline Functions Definitions ...
688 //
689 //------------------------------------------------------------------------------
690 
692  return !operator==(that);
693 }
694 
696 
697 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
698 
699 #endif
icu::BreakIterator
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:102
parseerr.h
C API: Parse Error Information.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::BreakIterator::current
virtual int32_t current(void) const =0
Return character index of the current iterator position within the text.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
icu::RuleBasedBreakIterator::fData
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
Definition: rbbi.h:70
icu::BreakIterator::getRuleStatus
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
icu::operator==
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
U_COMMON_API
#define U_COMMON_API
Definition: utypes.h:359
icu::BreakIterator::operator=
BreakIterator & operator=(const BreakIterator &other)
icu::BreakIterator::createBufferClone
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
icu::BreakIterator::setText
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
icu::BreakIterator::isBoundary
virtual UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
brkiter.h
C++ API: Break Iterator.
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
icu::BreakIterator::getDynamicClassID
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
icu::BreakIterator::preceding
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:286
schriter.h
C++ API: String Character Iterator.
icu::StringCharacterIterator
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition: schriter.h:45
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:400
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:93
icu::BreakIterator::refreshInputText
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:396
icu::RuleBasedBreakIterator
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition: rbbi.h:53
udata.h
C API: Data loading interface.
icu::BreakIterator::following
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
icu::operator!=
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
UText
UText struct.
Definition: utext.h:1345
icu::BreakIterator::adoptText
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
icu::BreakIterator::operator!=
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:131
UDataMemory
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
Definition: udata.h:158
icu::BreakIterator::last
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
icu::BreakIterator::getRuleStatusVec
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
icu::BreakIterator::clone
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
icu::BreakIterator::operator==
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
icu::BreakIterator::first
virtual int32_t first(void)=0
Sets the current iteration position to the beginning of the text, position zero.
icu::BreakIterator::getUText
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
icu::BreakIterator::next
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
icu::BreakIterator::previous
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
icu::BreakIterator::getText
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
U_NAMESPACE_END
#define U_NAMESPACE_END
Definition: uversion.h:138
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
Definition: uversion.h:137