public static int isContain(THashSet<String> set, ArrayList<String> subwords) { int i = 0; for(String s: subwords){ if(set.contains(s)) i++; } return i; }
/** * 返回词典标签 * @param word * @return 词典列表 */ public boolean contains(String word) { if(word.length()>maxLen) return false; return dict.contains(word); } }
/** * Tests the set to determine if all of the elements in * <tt>collection</tt> are present. * * @param collection a <code>Collection</code> value * @return true if all elements were present in the set. */ @SuppressWarnings("ForLoopReplaceableByForEach") public boolean containsAll(Collection<?> collection) { for (Iterator i = collection.iterator(); i.hasNext();) { if (!contains(i.next())) { return false; } } return true; }
/** * * @param toks * @param j * @return */ public int calcOOV(String[] toks, int j) { int count = 0; for(int i=0;i<toks.length;i++){//取得包含新词的最长子串 if(Chars.isLetterOrDigitOrPunc(toks[i])) continue; if(toks[i].length()>j) continue; if(!dict.contains(toks[i])&&!tempdict.contains(toks[i])){ count++; } } return count; } }
/** * 读取不带词频的字典 * @param dict * @param string * @param file * @throws IOException */ public void readDictionary(String path, String suffix) throws IOException { List<File> dicts = MyFiles.getAllFiles(path, suffix); for(File fdict: dicts){ BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(fdict),"utf8")); String line = null; while ((line = bfr.readLine()) != null) { line = line.replaceAll("(\\s| | |\\t)+", ""); if(line.length()==0||set.contains(line)) continue; set.add(line); } bfr.close(); } } /**
/** * 读取带词频的字典 * @param dict * @param string * @param file * @throws IOException */ public void readDictionaryWithFrequency(String path, String suffix) throws IOException { List<File> dicts = MyFiles.getAllFiles(path, suffix); for(File fdict: dicts){ BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(fdict),"utf8")); String line = null; while ((line = bfr.readLine()) != null) { String[] toks = line.split("(\\s| | |\\t)+"); line = toks[0]; if(line.length()==0||set.contains(line)) continue; set.add(line); } bfr.close(); } }
public void readSougou(String dict,int minLen,int maxLen,String name) throws IOException { for(int len=minLen;len<=maxLen;len++){ BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(dict),"utf8")); String line = null; while ((line = bfr.readLine()) != null) { String[] words =line.split("\\s+"); if(name.equals("sougou")&&words.length<3) continue; else if(words.length<1) continue; String w = words[0]; w = w.replaceAll("(\\s| | |\\t)+", ""); if(w.contains(" ")) System.out.println(); if(w.length()!=len) continue; ArrayList<String> subwords = getAllSubWords(w,2,3); if(subwords!=null){ int c = MyCollection.isContain(set,subwords); if(c>1) continue; } if(set.contains(w)) continue; set.add(w); } bfr.close(); } }
continue; if(!dict.contains(toks[i])&&!tempdict.contains(toks[i])){ nowords += "" + toks[i]; count++; while(it.hasNext()){ String s = it.next(); if(nodict.contains(s)) continue; System.out.println("搜索: "+s); System.out.println(sset); tempdict.addAll(sset); if(!sset.contains(s)&&!nodict.contains(s)){ nodict.add(s); bwNo.write(s);
public float calc(THashSet<Object> s1, THashSet<Object> s2) { int com = 0; if (s1 == null || s2 == null) return 0; TObjectHashIterator<Object> it = s1.iterator(); for ( int i = s1.size(); i-- > 0; ) { Object v = it.next(); if(s2.contains(v)) com++; } float sim = ((float) com)/(s1.size()+s2.size()-com); return sim; } }
@Override public boolean contains(Object obj) { return delegate.contains(obj); }
@Override public boolean contains(Object obj) { return delegate.contains(obj); }
/** * Tests the set to determine if all of the elements in * <tt>collection</tt> are present. * * @param collection a <code>Collection</code> value * @return true if all elements were present in the set. */ @SuppressWarnings("ForLoopReplaceableByForEach") public boolean containsAll(Collection<?> collection) { for (Iterator i = collection.iterator(); i.hasNext();) { if (!contains(i.next())) { return false; } } return true; }
/** * Tests the set to determine if all of the elements in * <tt>collection</tt> are present. * * @param collection a <code>Collection</code> value * @return true if all elements were present in the set. */ @Override public boolean containsAll(Collection<?> collection) { for (Iterator i = collection.iterator(); i.hasNext();) { if (!contains(i.next())) { return false; } } return true; }
/** * Tests the set to determine if all of the elements in * <tt>collection</tt> are present. * * @param collection a <code>Collection</code> value * @return true if all elements were present in the set. */ @SuppressWarnings("ForLoopReplaceableByForEach") public boolean containsAll(Collection<?> collection) { for (Iterator i = collection.iterator(); i.hasNext();) { if (!contains(i.next())) { return false; } } return true; }
/** * Tests the set to determine if all of the elements in * <tt>collection</tt> are present. * * @param collection a <code>Collection</code> value * @return true if all elements were present in the set. */ @SuppressWarnings("ForLoopReplaceableByForEach") public boolean containsAll( Collection<?> collection ) { for ( Iterator i = collection.iterator(); i.hasNext(); ) { if ( !contains( i.next() ) ) { return false; } } return true; }
public boolean contains(String string) { if (string == null) { return false; } return set.contains(getMd5String(string)); }
public String annotatePhrase(Constituent phrase){ String expression = phrase.toString(); String ret = ""; for (int i = 0; i < dictionaries.size(); i++) { if (dictionaries.get(i).contains(expression)) { String fullName = dictNames.get(i); String shortName = fullName.split("/")[fullName.split("/").length - 1]; ret += shortName + ","; } } for (int i = 0; i < dictionariesIgnoreCase.size(); i++){ if (dictionaries.get(i).contains(expression.toLowerCase())){ String fullName = dictNames.get(i); String shortName = fullName.split("/")[fullName.split("/").length - 1]; ret += shortName + "(IC),"; } } return ret; } }
public String annotatePhrase(Constituent phrase){ String expression = phrase.toString(); String ret = ""; for (int i = 0; i < dictionaries.size(); i++) { if (dictionaries.get(i).contains(expression)) { String fullName = dictNames.get(i); String shortName = fullName.split("/")[fullName.split("/").length - 1]; ret += shortName + ","; } } for (int i = 0; i < dictionariesIgnoreCase.size(); i++){ if (dictionaries.get(i).contains(expression.toLowerCase())){ String fullName = dictNames.get(i); String shortName = fullName.split("/")[fullName.split("/").length - 1]; ret += shortName + "(IC),"; } } return ret; } }