public class StringWordIndexer extends java.lang.Object implements WordIndexer<java.lang.String>
WordIndexer.StaticMethods
Constructor and Description |
---|
StringWordIndexer() |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
getEndSymbol()
Returns the start symbol (usually something like </s>
|
int |
getIndexPossiblyUnk(java.lang.String word)
Should never add to vocabulary, and should return getUnkSymbol() if the
word is not in the vocabulary.
|
int |
getOrAddIndex(java.lang.String word)
Gets the index for a word, adding if necessary.
|
int |
getOrAddIndexFromString(java.lang.String word) |
java.lang.String |
getStartSymbol()
Returns the start symbol (usually something like <s>
|
java.lang.String |
getUnkSymbol()
Returns the unk symbol (usually something like <unk>
|
java.lang.String |
getWord(int index)
Gets the word object for an index.
|
int |
numWords()
Number of words that have been added so far
|
void |
setEndSymbol(java.lang.String sym) |
void |
setStartSymbol(java.lang.String sym) |
void |
setUnkSymbol(java.lang.String sym) |
void |
trimAndLock()
Informs the implementation that no more words can be added to the
vocabulary.
|
public int getOrAddIndex(java.lang.String word)
WordIndexer
getOrAddIndex
in interface WordIndexer<java.lang.String>
public java.lang.String getWord(int index)
WordIndexer
getWord
in interface WordIndexer<java.lang.String>
public int numWords()
WordIndexer
numWords
in interface WordIndexer<java.lang.String>
public java.lang.String getStartSymbol()
WordIndexer
getStartSymbol
in interface WordIndexer<java.lang.String>
public java.lang.String getEndSymbol()
WordIndexer
getEndSymbol
in interface WordIndexer<java.lang.String>
public java.lang.String getUnkSymbol()
WordIndexer
getUnkSymbol
in interface WordIndexer<java.lang.String>
public int getOrAddIndexFromString(java.lang.String word)
getOrAddIndexFromString
in interface WordIndexer<java.lang.String>
public void setStartSymbol(java.lang.String sym)
setStartSymbol
in interface WordIndexer<java.lang.String>
public void setEndSymbol(java.lang.String sym)
setEndSymbol
in interface WordIndexer<java.lang.String>
public void setUnkSymbol(java.lang.String sym)
setUnkSymbol
in interface WordIndexer<java.lang.String>
public void trimAndLock()
WordIndexer
trimAndLock
in interface WordIndexer<java.lang.String>
public int getIndexPossiblyUnk(java.lang.String word)
WordIndexer
getIndexPossiblyUnk
in interface WordIndexer<java.lang.String>