@Override public String getText() { return word.getString(); }
@Override public String getText() { return word.getString(); }
@Override public Set<String> segment(String text) { Set<String> result = InsertionOrderUtil.newSet(); MMSeg mmSeg = new MMSeg(new StringReader(text), this.seg); try { Word word = null; while ((word = mmSeg.next()) != null) { result.add(word.getString()); } } catch (Exception e) { throw new HugeException("MMSeg4j segment text '%s' failed", e, text); } return result; } }
@Override public String getText() { return word.getString(); }
public String toString() { return getString(); } /**
public String toString() { return getString(); } /**
public String toString() { return getString(); } /**
@Override public String toString() { StringBuilder sb = new StringBuilder(); for(Word word : words) { if(word != null) { sb.append(word.getString()).append('_'); } } return sb.toString(); }
@Override public String toString() { StringBuilder sb = new StringBuilder(); for(Word word : words) { if(word != null) { sb.append(word.getString()).append('_'); } } return sb.toString(); }
@Override public String toString() { StringBuilder sb = new StringBuilder(); for(Word word : words) { if(word != null) { sb.append(word.getString()).append('_'); } } return sb.toString(); }
public String segWords(Reader input, String wordSpilt) throws IOException { StringBuilder sb = new StringBuilder(); Seg seg = getSeg(); //取得不同的分词具体算法 MMSeg mmSeg = new MMSeg(input, seg); Word word = null; boolean first = true; while((word=mmSeg.next())!=null) { if(!first) { sb.append(wordSpilt); } String w = word.getString(); sb.append(w); first = false; } return sb.toString(); }
public String segWords(Reader input, String wordSpilt) throws IOException { StringBuilder sb = new StringBuilder(); Seg seg = getSeg(); //取得不同的分词具体算法 MMSeg mmSeg = new MMSeg(input, seg); Word word = null; boolean first = true; while((word=mmSeg.next())!=null) { if(!first) { sb.append(wordSpilt); } String w = word.getString(); sb.append(w); first = false; } return sb.toString(); }
public List<String> getWords(String document){ List<String> words = new FastList<String>(); Reader sr = new StringReader(document.toString()); this.reset(sr); Word word = null; List<String> identifiedWords = chineseWordIdentifier.getIdentifiedWords(); try { while(true) { word = this.next(); if (word != null){ // MMSeg分出来的非中文词条 words.add(word.getString()); } else if (identifiedWords.size() > 0){ //自己分出来的中文词条 words.addAll(identifiedWords); identifiedWords.clear(); } else { //没词了 break; } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return words; }
@Override public Set<String> segment(String text) { Set<String> result = InsertionOrderUtil.newSet(); MMSeg mmSeg = new MMSeg(new StringReader(text), this.seg); try { Word word = null; while ((word = mmSeg.next()) != null) { result.add(word.getString()); } } catch (Exception e) { throw new HugeException("MMSeg4j segment text '%s' failed", e, text); } return result; } }
@Override void analyzeWords(AnalyzedText aText) { FastList<String> list = new FastList<String>(); FastSet<String> set = new FastSet<String>(); com.chenlb.mmseg4j.Word word = null; Reader sr = new StringReader(aText.getText()); synchronized(mmSeg){ mmSeg.reset(sr); try{ while((word=mmSeg.next())!=null) { String w = word.getString(); list.add(w); set.add(w); } }catch(IOException e){ throw new RuntimeException("IOException occurred", e); } } aText.setWords(list); aText.setUniqueWords(set); }