@Override public SentenceData09 apply(SentenceData09 snt09) { tag(snt09); return snt09; }
public void initValues() { s_pos = mf.getFeatureBits(POS); s_word = mf.getFeatureBits(WORD); s_type = mf.getFeatureBits(TYPE); s_char = mf.getFeatureBits(CHAR); d1.a0 = s_type; d1.a1 = s_pos; d1.a2= s_word;d1.a3= s_word; d2.a0 = s_type; d2.a1 = s_pos; d2.a2= s_pos; d2.a3= s_pos; d2.a4= s_pos; d2.a5= s_pos; d2.a6= s_pos; d3.a0 = s_type; d3.a1 = s_pos; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char; dw.a0 = s_type; dw.a1 = s_pos;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word; dwp.a0 = s_type; dwp.a1 = s_pos;dwp.a2= s_word ; dwp.a3= s_pos; dwp.a4= s_word; }
public static void main (String[] args) throws FileNotFoundException, Exception { long start = System.currentTimeMillis(); Options options = new Options(args); Tagger tagger = new Tagger(); if (options.train) { // depReader.normalizeOn=false; tagger.li = new Long2Int(options.hsize); tagger.pipe = new ExtractorT2 (options, tagger.mf= new MFO()); //tagger.pipe.li =tagger.li; InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile); tagger.params = new ParametersFloat(tagger.li.size()); tagger.train(options, tagger.pipe,tagger.params,is); tagger.writeModel(options, tagger.pipe, tagger.params); } if (options.test) { tagger.readModel(options); tagger.out(options,tagger.pipe, tagger.params); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); Evaluator.evaluateTagger(options.goldfile, options.outfile,options.format); } long end = System.currentTimeMillis(); System.out.println("used time "+((float)((end-start)/100)/10)); }
public static Tagger getTagger(File modelFile) { String[] argsT={"-model",modelFile.toString()}; return new Tagger(new is2.tag.Options(argsT)); }
public ArrayList<String> tagStrings(InstancesTagger is,int instanceIndex, int word, String wordForm) { ArrayList<POS> plist = pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); String pos[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.POS)); ArrayList<String> postags =null; for(POS p : plist) { try { postags.add(pos[p.p]); }catch(Exception e) { e.printStackTrace(); } } return postags; }
public static Tagger getTagger(File modelFile) { String[] argsT = { "-model", modelFile.toString() }; return new Tagger(modelFile.toString()); // new is2.tag.Options(argsT)); }
public void read(DataInputStream din) throws IOException { int size = din.readInt(); for(int i=0; i<size;i++) { String k = din.readUTF(); int size2 = din.readInt(); HashMap<String,Integer> h = new HashMap<String,Integer>(); getFeatureSet().put(k,h); for(int j = 0;j<size2;j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } calculateBits(); }
/** * @param modelFileName the file name of the model */ public Tagger(String modelFileName) { this(new Options(new String[]{"-model",modelFileName})); }
/** * Register an attribute class, if it not exists and add a possible value * @param type * @param type2 */ final public int register(String a, String v) { HashMap<String,Integer> fs = getFeatureSet().get(a); if (fs==null) { fs = new HashMap<String,Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); } Integer c = getFeatureCounter().get(a); Integer i = fs.get(v); if (i==null) { fs.put(v, c); c++; getFeatureCounter().put(a,c); return c-1; } else return i; }
private void tag(InstancesTagger is, SentenceData09 instance) { int length = instance.ppos.length; short[] pos = new short[instance.gpos.length]; float sc[] =new float[instance.ppos.length]; instance.ppos[0]= is2.io.CONLLReader09.ROOT_POS; pos[0]=(short)pipe.mf.getValue(ExtractorT2.POS, is2.io.CONLLReader09.ROOT_POS); for(int j = 1; j < length; j++) { short bestType = (short)pipe.fillFeatureVectorsOne( instance.forms[j],params, j, is,0,pos,this.li,sc); pos[j] = bestType; instance.ppos[j]= pipe.types[bestType]; } for(int j = 1; j < length; j++) { short bestType = (short)pipe.fillFeatureVectorsOne(instance.forms[j],params, j, is,0,pos,this.li,sc); instance.ppos[j]= pipe.types[bestType]; pos[j]=bestType; } }
/** * Clear the data */ public void clearData() { getFeatureSet().clear(); m_featureBits.clear(); getFeatureSet().clear(); }
public ArrayList<POS> classify(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos, Long2IntInterface li) { F2SF f = new F2SF(params.parameters); long vs[] = new long[_MAX]; int lemmas[]; if (options.noLemmas) lemmas = new int[is.length(n)]; else lemmas = is.plemmas[n]; addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs); ArrayList<POS> best = new ArrayList<POS>(types.length); for(int t=0;t<types.length;t++) { int p = t<<s_type; f.clear(); f.add(vs,li, p); POS px = new POS(t, f.score); best.add(px); } Collections.sort(best); return best; }
pipe.initValues(); this.tag(is, instance);
/** * Tag a single word and return a n-best list of Part-of-Speech tags. * * @param is set of sentences * @param instanceIndex index to the sentence in question * @param word word to be tagged * @return n-best list of Part-of-Speech tags */ public ArrayList<POS> tag(InstancesTagger is,int instanceIndex, int word, String wordForm) { return pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); }
public Instances createInstances(String file) { return createInstances(file, -1, -1); }
/** * Initialize * @param options */ public Tagger (Options options) { // load the model try { readModel(options); } catch (Exception e) { e.printStackTrace(); } } public Tagger() { }
/** * Calculates the number of bits needed to encode a feature */ public void calculateBits() { int total=0; for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); m_featureBits.put(e.getKey(), bits); total+=bits; // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } // System.out.println("total number of needed bits "+total); }
private void registerChars(String type, String word) { for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i))); }
public SentenceData09 tag(SentenceData09 instance){ InstancesTagger is = new InstancesTagger(); is.init(1, pipe.mf); new CONLLReader09().insert(is, instance); is.fillChars(instance, 0, ExtractorT2._CEND); tag(is, instance); return instance; }