Tabnine Logo
Word
Code IndexAdd Tabnine to your IDE (free)

How to use
Word
in
com.chenlb.mmseg4j

Best Java code snippets using com.chenlb.mmseg4j.Word (Showing top 20 results out of 315)

origin: looly/hutool

@Override
public int getStartOffset() {
  return this.word.getStartOffset();
}
 
origin: looly/hutool

@Override
public String getText() {
  return word.getString();
}
 
origin: looly/hutool

@Override
public int getEndOffset() {
  return this.word.getEndOffset();
}
origin: com.chenlb.mmseg4j/mmseg4j-solr

  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      return false;
    }
  }
}
origin: medcl/elasticsearch-analysis-mmseg

Word word = chunk.words[i];
if(word.getLength() < 3) {
  cks.add(word);
} else {
  char[] chs = word.getSen();
  int offset = word.getWordOffset(), n = 0, wordEnd = word.getWordOffset()+word.getLength();
  int senStartOffset = word.getStartOffset() - offset;	//sen 在文件中的位置
  int end = -1;	//上一次找到的位置
  for(; offset<wordEnd-1; offset++) {
    int idx = search(chs, offset, 1);
    if(idx > -1) {
      cks.add(new Word(chs, senStartOffset, offset, 2));
      end = offset+2;
      n++;
    } else if(offset >= end) {	//有单字
      cks.add(new Word(chs, senStartOffset, offset, 1));
      end = offset+1;
    cks.add(new Word(chs, senStartOffset, offset, 1));
origin: medcl/elasticsearch-analysis-mmseg

public String getString() {
  return new String(getSen(), getWordOffset(), getLength());
}
 
origin: com.chenlb.mmseg4j/mmseg4j-core

public int getEndOffset() {
  return getStartOffset() + getLength();
}
public int getDegree() {
origin: medcl/elasticsearch-analysis-mmseg

/** Word Length */
public int getLen() {
  if(len < 0) {
    len = 0;
    count = 0;
    for(Word word : words) {
      if(word != null) {
        len += word.getLength();
        count++;
      }
    }
  }
  return len;
}
 
origin: medcl/elasticsearch-analysis-mmseg

private Word createWord(StringBuilder bufSentence, int startIdx, String type) {
  return new Word(toChars(bufSentence), startIdx, type);
}
 
origin: medcl/elasticsearch-analysis-mmseg

private Chunk createChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns/*, char[][] cks*/) {
  Chunk ck = new Chunk();
  
  for(int i=0; i<3; i++) {
    if(offsets[i] < chs.length) {
      ck.words[i] = new Word(chs, sen.getStartOffset(), offsets[i], tailLen[i]+1);//new Word(cks[i], sen.getStartOffset()+offsets[i]);
      if(tailLen[i] == 0) {	//单字的要取得"字频计算出自由度"
        CharNode cn = cns[i];    //dic.head(chs[offsets[i]]);
        if(cn !=null) {
          ck.words[i].setDegree(cn.getFreq());
        }
      }
    }
  }
  return ck;
}
 
origin: com.chenlb.mmseg4j/mmseg4j-core

/** Sum of Degree of Morphemic Freedom of One-Character */
public int getSumDegree() {
  if(sumDegree < 0) {
    int sum = 0;
    for(Word word : words) {
      if(word != null && word.getDegree() > -1) {
        sum += word.getDegree();
      }
    }
    sumDegree = sum;
  }
  return sumDegree;
}
 
origin: chenlb/mmseg4j-solr

  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      return false;
    }
  }
}
origin: chenlb/mmseg4j-core

Word word = chunk.words[i];
if(word.getLength() < 3) {
  cks.add(word);
} else {
  char[] chs = word.getSen();
  int offset = word.getWordOffset(), n = 0, wordEnd = word.getWordOffset()+word.getLength();
  int senStartOffset = word.getStartOffset() - offset;	//sen 在文件中的位置
  int end = -1;	//上一次找到的位置
  for(; offset<wordEnd-1; offset++) {
    int idx = search(chs, offset, 1);
    if(idx > -1) {
      cks.add(new Word(chs, senStartOffset, offset, 2));
      end = offset+2;
      n++;
    } else if(offset >= end) {	//有单字
      cks.add(new Word(chs, senStartOffset, offset, 1));
      end = offset+1;
    cks.add(new Word(chs, senStartOffset, offset, 1));
origin: chenlb/mmseg4j-core

public String getString() {
  return new String(getSen(), getWordOffset(), getLength());
}
 
origin: chenlb/mmseg4j-core

public int getEndOffset() {
  return getStartOffset() + getLength();
}
public int getDegree() {
origin: chenlb/mmseg4j-core

/** Word Length */
public int getLen() {
  if(len < 0) {
    len = 0;
    count = 0;
    for(Word word : words) {
      if(word != null) {
        len += word.getLength();
        count++;
      }
    }
  }
  return len;
}
 
origin: chenlb/mmseg4j-core

private Word createWord(StringBuilder bufSentence, int startIdx, String type) {
  return new Word(toChars(bufSentence), startIdx, type);
}
 
origin: com.chenlb.mmseg4j/mmseg4j-core

private Chunk createChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns/*, char[][] cks*/) {
  Chunk ck = new Chunk();
  
  for(int i=0; i<3; i++) {
    if(offsets[i] < chs.length) {
      ck.words[i] = new Word(chs, sen.getStartOffset(), offsets[i], tailLen[i]+1);//new Word(cks[i], sen.getStartOffset()+offsets[i]);
      if(tailLen[i] == 0) {	//单字的要取得"字频计算出自由度"
        CharNode cn = cns[i];    //dic.head(chs[offsets[i]]);
        if(cn !=null) {
          ck.words[i].setDegree(cn.getFreq());
        }
      }
    }
  }
  return ck;
}
 
origin: medcl/elasticsearch-analysis-mmseg

/** Sum of Degree of Morphemic Freedom of One-Character */
public int getSumDegree() {
  if(sumDegree < 0) {
    int sum = 0;
    for(Word word : words) {
      if(word != null && word.getDegree() > -1) {
        sum += word.getDegree();
      }
    }
    sumDegree = sum;
  }
  return sumDegree;
}
 
origin: medcl/elasticsearch-analysis-mmseg

  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.get().next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
  }
}
com.chenlb.mmseg4jWord

Javadoc

类似 lucene 的 token

Most used methods

  • getStartOffset
    此 word 在整个文本中的偏移位置
  • getString
  • getEndOffset
  • getLength
  • getSen
  • getWordOffset
    词在 char[] sen 的偏移位置
  • getType
  • <init>
  • getDegree
  • setDegree

Popular in Java

  • Updating database using SQL prepared statement
  • getOriginalFilename (MultipartFile)
    Return the original filename in the client's filesystem.This may contain path information depending
  • runOnUiThread (Activity)
  • getSharedPreferences (Context)
  • EOFException (java.io)
    Thrown when a program encounters the end of a file or stream during an input operation.
  • SocketException (java.net)
    This SocketException may be thrown during socket creation or setting options, and is the superclass
  • UnknownHostException (java.net)
    Thrown when a hostname can not be resolved.
  • DateFormat (java.text)
    Formats or parses dates and times.This class provides factories for obtaining instances configured f
  • NoSuchElementException (java.util)
    Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
  • Reflections (org.reflections)
    Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
  • Best plugins for Eclipse
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now