/** * Tag a single word and return a n-best list of Part-of-Speech tags. * * @param is set of sentences * @param instanceIndex index to the sentence in question * @param word word to be tagged * @return n-best list of Part-of-Speech tags */ public ArrayList<POS> tag(InstancesTagger is,int instanceIndex, int word, String wordForm) { return pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); }
public Instances createInstances(String file) { return createInstances(file, -1, -1); }
private void tag(InstancesTagger is, SentenceData09 instance) { int length = instance.ppos.length; short[] pos = new short[instance.gpos.length]; float sc[] =new float[instance.ppos.length]; instance.ppos[0]= is2.io.CONLLReader09.ROOT_POS; pos[0]=(short)pipe.mf.getValue(ExtractorT2.POS, is2.io.CONLLReader09.ROOT_POS); for(int j = 1; j < length; j++) { short bestType = (short)pipe.fillFeatureVectorsOne( instance.forms[j],params, j, is,0,pos,this.li,sc); pos[j] = bestType; instance.ppos[j]= pipe.types[bestType]; } for(int j = 1; j < length; j++) { short bestType = (short)pipe.fillFeatureVectorsOne(instance.forms[j],params, j, is,0,pos,this.li,sc); instance.ppos[j]= pipe.types[bestType]; pos[j]=bestType; } }
public void readModel(OptionsSuper options) { try{ pipe = new ExtractorT2(options, mf =new MFO()); _options=options; // load the model ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); zis.getNextEntry(); DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); pipe.mf.read(dis); pipe.initValues(); pipe.initFeatures(); params = new ParametersFloat(0); params.read(dis); li = new Long2Int(params.parameters.length); pipe.read(dis); dis.close(); pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorT2.POS)]; for(Entry<String,Integer> e : pipe.mf.getFeatureSet().get(ExtractorT2.POS).entrySet()) pipe.types[e.getValue()] = e.getKey(); DB.println("Loading data finished. "); } catch(Exception e) { e.printStackTrace(); } }
for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase()); for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); else lx= new Lexicon(options.lexicon,mf); initFeatures(); initValues(); if (num1 % 100 ==0) del = outValue(num1, del); instanceCount++; outValue(num1, del); System.out.println();
public static void main (String[] args) throws FileNotFoundException, Exception { long start = System.currentTimeMillis(); Options options = new Options(args); Tagger tagger = new Tagger(); if (options.train) { // depReader.normalizeOn=false; tagger.li = new Long2Int(options.hsize); tagger.pipe = new ExtractorT2 (options, tagger.mf= new MFO()); //tagger.pipe.li =tagger.li; InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile); tagger.params = new ParametersFloat(tagger.li.size()); tagger.train(options, tagger.pipe,tagger.params,is); tagger.writeModel(options, tagger.pipe, tagger.params); } if (options.test) { tagger.readModel(options); tagger.out(options,tagger.pipe, tagger.params); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); Evaluator.evaluateTagger(options.goldfile, options.outfile,options.format); } long end = System.currentTimeMillis(); System.out.println("used time "+((float)((end-start)/100)/10)); }
public ArrayList<POS> classify(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos, Long2IntInterface li) { F2SF f = new F2SF(params.parameters); long vs[] = new long[_MAX]; int lemmas[]; if (options.noLemmas) lemmas = new int[is.length(n)]; else lemmas = is.plemmas[n]; addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs); ArrayList<POS> best = new ArrayList<POS>(types.length); for(int t=0;t<types.length;t++) { int p = t<<s_type; f.clear(); f.add(vs,li, p); POS px = new POS(t, f.score); best.add(px); } Collections.sort(best); return best; }
public int fillFeatureVectorsOne(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos,Long2IntInterface li, float[] score) { float best = -1000; int bestType = -1; F2SF f = new F2SF(params.parameters); long vs[] = new long[_MAX]; int lemmas[]; if (options.noLemmas) lemmas = new int[is.length(n)]; else lemmas = is.plemmas[n]; addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs); //for(int t = 0; t < types.length; t++) { for(int t=0;t<types.length;t++) { int p = t<<s_type; f.clear(); for(int k=0;vs[k]!=Integer.MIN_VALUE;k++) if(vs[k]>0) f.add(li.l2i(vs[k]+p)); if (f.score > best) { bestType=t; score[w1]= best =f.score; } } return bestType; }
else lemmas = is.plemmas[n]; this.pipe.addFeatures(is,n,wds[is.forms[n][w]],w,is.gpos[n],is.forms[n], lemmas, vs);
public ArrayList<String> tagStrings(InstancesTagger is,int instanceIndex, int word, String wordForm) { ArrayList<POS> plist = pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); String pos[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.POS)); ArrayList<String> postags =null; for(POS p : plist) { try { postags.add(pos[p.p]); }catch(Exception e) { e.printStackTrace(); } } return postags; }