public void read(DataInputStream din) throws IOException { int size = din.readInt(); for(int i=0; i<size;i++) { String k = din.readUTF(); int size2 = din.readInt(); HashMap<String,Integer> h = new HashMap<String,Integer>(); getFeatureSet().put(k,h); for(int j = 0;j<size2;j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } calculateBits(); }
@Override public String toString() { StringBuffer content = new StringBuffer(); for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ content.append(e.getKey()+" "+e.getValue()); content.append(':'); // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); content.append(getFeatureBits(e.getKey())); /*if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) { content.append(e2.getKey()+" ("+e2.getValue()+") "); }*/ content.append('\n'); } return content.toString(); }
public ArrayList<String> tagStrings(InstancesTagger is,int instanceIndex, int word, String wordForm) { ArrayList<POS> plist = pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); String pos[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.POS)); ArrayList<String> postags =null; for(POS p : plist) { try { postags.add(pos[p.p]); }catch(Exception e) { e.printStackTrace(); } } return postags; }
/** * Register an attribute class, if it not exists and add a possible value * @param type * @param type2 */ final public int register(String a, String v) { HashMap<String,Integer> fs = getFeatureSet().get(a); if (fs==null) { fs = new HashMap<String,Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); } Integer c = getFeatureCounter().get(a); Integer i = fs.get(v); if (i==null) { fs.put(v, c); c++; getFeatureCounter().put(a,c); return c-1; } else return i; }
mf.register(POS,"<root-POS>"); mf.register(WORD,"<root>"); for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase()); for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); mf.register(POS, w[i1]); HashSet<Integer> ps = pps.get(mf.getValue(POS,w[i1])); if (ps==null) { ps= new HashSet<Integer>(); pps.put(mf.getValue(POS,w[i1]), ps); if (i1+1<w.length) ps.add(mf.getValue(POS,w[i1+1])); System.out.println("words in corpus "+(corpusWrds=mf.getFeatureCounter().get(ExtractorT2.WORD))); if (options.clusterFile==null)cl = new Cluster(); else cl= new Cluster(options.clusterFile, mf,6); mf.calculateBits(); initValues(); System.out.println(""+mf.toString()); is.pposs[num1][k] = (short)mf.getValue(FM, instance1.ppos[k].split("\\|")[1]);
d1.v0 = f++; d1.v2=form2; l=mf.calc3(d1); vs[n++]=mf.calc3(d1); d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1); d3.v0=f++; vs[n++]=mf.calc3(d3); d3.v0=f++; vs[n++]=mf.calc4(d3); d3.v0=f++; vs[n++]=mf.calc5(d3); d3.v0=f++; vs[n++]=mf.calc6(d3); d3.v0=f++; vs[n++]=mf.calc7(d3); d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3); d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); d3.v0 =f++; vs[n++]=mf.calc3(d3); d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l); d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l); d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l); d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l); d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); d3.v0=f+3; d3.v2=lx.getTag(form); vs[n++]=mf.calc3(d3); d3.v0=f+4; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); d3.v0=f+5; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
public void readModel(OptionsSuper options) { try{ pipe = new ExtractorT2(options, mf =new MFO()); _options=options; // load the model ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); zis.getNextEntry(); DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); pipe.mf.read(dis); pipe.initValues(); pipe.initFeatures(); params = new ParametersFloat(0); params.read(dis); li = new Long2Int(params.parameters.length); pipe.read(dis); dis.close(); pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorT2.POS)]; for(Entry<String,Integer> e : pipe.mf.getFeatureSet().get(ExtractorT2.POS).entrySet()) pipe.types[e.getValue()] = e.getKey(); DB.println("Loading data finished. "); } catch(Exception e) { e.printStackTrace(); } }
/** * Clear the data */ public void clearData() { getFeatureSet().clear(); m_featureBits.clear(); getFeatureSet().clear(); }
public void initValues() { s_pos = mf.getFeatureBits(POS); s_word = mf.getFeatureBits(WORD); s_type = mf.getFeatureBits(TYPE); s_char = mf.getFeatureBits(CHAR); d1.a0 = s_type; d1.a1 = s_pos; d1.a2= s_word;d1.a3= s_word; d2.a0 = s_type; d2.a1 = s_pos; d2.a2= s_pos; d2.a3= s_pos; d2.a4= s_pos; d2.a5= s_pos; d2.a6= s_pos; d3.a0 = s_type; d3.a1 = s_pos; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char; dw.a0 = s_type; dw.a1 = s_pos;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word; dwp.a0 = s_type; dwp.a1 = s_pos;dwp.a2= s_word ; dwp.a3= s_pos; dwp.a4= s_word; }
/** * Calculates the number of bits needed to encode a feature */ public void calculateBits() { int total=0; for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); m_featureBits.put(e.getKey(), bits); total+=bits; // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } // System.out.println("total number of needed bits "+total); }
public static void main (String[] args) throws FileNotFoundException, Exception { long start = System.currentTimeMillis(); Options options = new Options(args); Tagger tagger = new Tagger(); if (options.train) { // depReader.normalizeOn=false; tagger.li = new Long2Int(options.hsize); tagger.pipe = new ExtractorT2 (options, tagger.mf= new MFO()); //tagger.pipe.li =tagger.li; InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile); tagger.params = new ParametersFloat(tagger.li.size()); tagger.train(options, tagger.pipe,tagger.params,is); tagger.writeModel(options, tagger.pipe, tagger.params); } if (options.test) { tagger.readModel(options); tagger.out(options,tagger.pipe, tagger.params); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); Evaluator.evaluateTagger(options.goldfile, options.outfile,options.format); } long end = System.currentTimeMillis(); System.out.println("used time "+((float)((end-start)/100)/10)); }
String wds[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.WORD));
/** * Write the data * @param dos * @throws IOException */ public void writeData(DataOutputStream dos) throws IOException { dos.writeInt(getFeatureSet().size()); for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { dos.writeUTF(e.getKey()); dos.writeInt(e.getValue().size()); for(Entry<String,Integer> e2 : e.getValue().entrySet()) { if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); dos.writeUTF(e2.getKey()); dos.writeInt(e2.getValue()); } } } public void read(DataInputStream din) throws IOException {
@Override protected Tagger produceResource(URL aUrl) throws IOException { File modelFile = ResourceUtils.getUrlAsFile(aUrl, true); String[] args = { "-model", modelFile.getPath() }; Options option = new Options(args); Tagger tagger = new Tagger(option); // create a POSTagger HashMap<String, HashMap<String, Integer>> featureSet = tagger.mf.getFeatureSet(); SingletonTagset posTags = new SingletonTagset(POS.class, getResourceMetaData() .getProperty("pos.tagset")); HashMap<String, Integer> posTagFeatures = featureSet.get("POS"); posTags.addAll(posTagFeatures.keySet()); posTags.removeAll(asList("<None>", "<root-POS>")); addTagset(posTags); if (printTagSet) { getContext().getLogger().log(INFO, getTagset().toString()); } return tagger; } };