/** * Calculates the number of bits needed to encode a feature */ public void calculateBits() { int total=0; for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); m_featureBits.put(e.getKey(), bits); total+=bits; // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } // System.out.println("total number of needed bits "+total); }
public void read(DataInputStream din) throws IOException { int size = din.readInt(); for(int i=0; i<size;i++) { String k = din.readUTF(); int size2 = din.readInt(); HashMap<String,Integer> h = new HashMap<String,Integer>(); getFeatureSet().put(k,h); for(int j = 0;j<size2;j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } calculateBits(); }
@Override public String toString() { StringBuffer content = new StringBuffer(); for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ content.append(e.getKey()+" "+e.getValue()); content.append(':'); // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); content.append(getFeatureBits(e.getKey())); /*if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) { content.append(e2.getKey()+" ("+e2.getValue()+") "); }*/ content.append('\n'); } return content.toString(); }
/** * Register an attribute class, if it not exists and add a possible value * @param type * @param type2 */ final public int register(String a, String v) { HashMap<String,Integer> fs = getFeatureSet().get(a); if (fs==null) { fs = new HashMap<String,Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); } Integer c = getFeatureCounter().get(a); Integer i = fs.get(v); if (i==null) { fs.put(v, c); c++; getFeatureCounter().put(a,c); return c-1; } else return i; }
public void readModel(OptionsSuper options) { try{ pipe = new ExtractorT2(options, mf =new MFO()); _options=options; // load the model ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); zis.getNextEntry(); DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); pipe.mf.read(dis); pipe.initValues(); pipe.initFeatures(); params = new ParametersFloat(0); params.read(dis); li = new Long2Int(params.parameters.length); pipe.read(dis); dis.close(); pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorT2.POS)]; for(Entry<String,Integer> e : pipe.mf.getFeatureSet().get(ExtractorT2.POS).entrySet()) pipe.types[e.getValue()] = e.getKey(); DB.println("Loading data finished. "); } catch(Exception e) { e.printStackTrace(); } }
System.out.println("words in corpus "+(corpusWrds=mf.getFeatureCounter().get(ExtractorT2.WORD))); if (options.clusterFile==null)cl = new Cluster(); else cl= new Cluster(options.clusterFile, mf,6);