T
- public final class HashNgramMap<T> extends AbstractNgramMap<T> implements ContextEncodedNgramMap<T>
NgramMap.Entry<T>
NUM_BITS_PER_BYTE, NUM_SUFFIX_BITS, NUM_WORD_BITS, opts, SUFFIX_BIT_MASK, values, WORD_BIT_MASK
Modifier and Type | Method and Description |
---|---|
void |
clearStorage() |
boolean |
contains(int[] ngram,
int startPos,
int endPos) |
static <T> HashNgramMap<T> |
createExplicitWordHashNgramMap(ValueContainer<T> values,
ConfigOptions opts,
int maxNgramOrder,
boolean reversed)
Note: Explicit HashNgramMap can grow beyond maxNgramOrder
|
static <T> HashNgramMap<T> |
createImplicitWordHashNgramMap(ValueContainer<T> values,
ConfigOptions opts,
LongArray[] numNgramsForEachWord,
boolean reversed) |
T |
get(int[] ngram,
int startPos,
int endPos) |
int |
getFirstWordForOffset(long offset,
int ngramOrder) |
int |
getLastWordForOffset(long offset,
int ngramOrder) |
int |
getMaxNgramOrder() |
long |
getNextContextOffset(long offset,
int ngramOrder) |
int |
getNextWord(long offset,
int ngramOrder) |
int[] |
getNgramForOffset(long offset,
int ngramOrder) |
int[] |
getNgramForOffset(long offset,
int ngramOrder,
int[] ret) |
int[] |
getNgramFromContextEncoding(long contextOffset,
int contextOrder,
int word) |
java.lang.Iterable<java.lang.Long> |
getNgramOffsetsForOrder(int ngramOrder) |
java.lang.Iterable<NgramMap.Entry<T>> |
getNgramsForOrder(int ngramOrder) |
long |
getNumNgrams(int ngramOrder) |
long |
getOffset(long contextOffset,
int contextOrder,
int word) |
ContextEncodedNgramLanguageModel.LmContextInfo |
getOffsetForNgram(int[] ngram,
int startPos,
int endPos) |
long |
getOffsetForNgramInModel(int[] ngram,
int startPos,
int endPos)
Like
getOffsetForNgram(int[], int, int) , but assumes that the
full n-gram is in the map (i.e. |
long |
getPrefixOffset(long offset,
int ngramOrder)
Gets the offset of the context for an n-gram (represented by offset)
|
long |
getTotalSize() |
long |
getValueAndOffset(long contextOffset,
int contextOrder,
int word,
T outputVal) |
CustomWidthArray |
getValueStoringArray(int ngramOrder) |
void |
handleNgramsFinished(int justFinishedOrder) |
void |
initWithLengths(java.util.List<java.lang.Long> numNGrams) |
boolean |
isReversed() |
long |
put(int[] ngram,
int startPos,
int endPos,
T val) |
long |
putWithOffset(int[] ngram,
int startPos,
int endPos,
long contextOffset,
T val)
Warning: does not rehash if load factor is exceeded, must call
rehashIfNecessary explicitly.
|
long |
putWithOffsetAndSuffix(int[] ngram,
int startPos,
int endPos,
long contextOffset,
long suffixOffset,
T val)
Warning: does not rehash if load factor is exceeded, must call
rehashIfNecessary explicitly.
|
void |
rehashIfNecessary(int num) |
void |
trim() |
boolean |
wordHasBigrams(int word) |
combineToKey, containsOutOfVocab, contextOffsetOf, equals, getSubArray, getValues, wordOf
public static <T> HashNgramMap<T> createImplicitWordHashNgramMap(ValueContainer<T> values, ConfigOptions opts, LongArray[] numNgramsForEachWord, boolean reversed)
public static <T> HashNgramMap<T> createExplicitWordHashNgramMap(ValueContainer<T> values, ConfigOptions opts, int maxNgramOrder, boolean reversed)
T
- values
- opts
- maxNgramOrder
- reversed
- public long put(int[] ngram, int startPos, int endPos, T val)
public long putWithOffset(int[] ngram, int startPos, int endPos, long contextOffset, T val)
ngram
- startPos
- endPos
- contextOffset
- val
- public long putWithOffsetAndSuffix(int[] ngram, int startPos, int endPos, long contextOffset, long suffixOffset, T val)
ngram
- startPos
- endPos
- contextOffset
- val
- public void rehashIfNecessary(int num)
public long getValueAndOffset(long contextOffset, int contextOrder, int word, T outputVal)
getValueAndOffset
in interface NgramMap<T>
public long getOffset(long contextOffset, int contextOrder, int word)
getOffset
in interface ContextEncodedNgramMap<T>
public int[] getNgramFromContextEncoding(long contextOffset, int contextOrder, int word)
getNgramFromContextEncoding
in interface ContextEncodedNgramMap<T>
public int getNextWord(long offset, int ngramOrder)
public long getNextContextOffset(long offset, int ngramOrder)
public int getFirstWordForOffset(long offset, int ngramOrder)
public int getLastWordForOffset(long offset, int ngramOrder)
public int[] getNgramForOffset(long offset, int ngramOrder)
public int[] getNgramForOffset(long offset, int ngramOrder, int[] ret)
public ContextEncodedNgramLanguageModel.LmContextInfo getOffsetForNgram(int[] ngram, int startPos, int endPos)
getOffsetForNgram
in interface ContextEncodedNgramMap<T>
public long getOffsetForNgramInModel(int[] ngram, int startPos, int endPos)
getOffsetForNgram(int[], int, int)
, but assumes that the
full n-gram is in the map (i.e. does not back off to the largest suffix
which is in the model).ngram
- startPos
- endPos
- public void handleNgramsFinished(int justFinishedOrder)
handleNgramsFinished
in interface NgramMap<T>
public void initWithLengths(java.util.List<java.lang.Long> numNGrams)
initWithLengths
in interface NgramMap<T>
public long getPrefixOffset(long offset, int ngramOrder)
offset
- public int getMaxNgramOrder()
getMaxNgramOrder
in interface NgramMap<T>
public long getNumNgrams(int ngramOrder)
getNumNgrams
in interface NgramMap<T>
public java.lang.Iterable<NgramMap.Entry<T>> getNgramsForOrder(int ngramOrder)
getNgramsForOrder
in interface NgramMap<T>
public java.lang.Iterable<java.lang.Long> getNgramOffsetsForOrder(int ngramOrder)
public boolean isReversed()
public boolean wordHasBigrams(int word)
wordHasBigrams
in interface ContextEncodedNgramMap<T>
public boolean contains(int[] ngram, int startPos, int endPos)
public long getTotalSize()
public CustomWidthArray getValueStoringArray(int ngramOrder)
getValueStoringArray
in interface NgramMap<T>
public void clearStorage()
clearStorage
in interface NgramMap<T>