public SerialPipes newSerialPipesFromRange (int start, int end) { return new SerialPipes(pipes.subList(start, end)); }
static CRFExtractor hackCrfExtor (CRF crf) { Pipe[] newPipes = new Pipe [3]; SerialPipes pipes = (SerialPipes) crf.getInputPipe (); for (int i = 0; i < 3; i++) { Pipe p0 = pipes.getPipe (0); //pipes.removePipe (0); TODO Fix me //p0.setParent (null); newPipes[i] = p0; } Pipe tokPipe = new SerialPipes (newPipes); CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe); return extor; }
public SerialPipes (Collection<Pipe> pipeList) { pipes = new ArrayList<Pipe> (pipeList); resolveAlphabets(); }
public static void setInProduction(Pipe p, boolean value) { if (p instanceof AddClassifierTokenPredictions) ((AddClassifierTokenPredictions) p).setInProduction(value); else if (p instanceof SerialPipes) { SerialPipes sp = (SerialPipes) p; for (int i = 0; i < sp.size(); i++) setInProduction(sp.getPipe(i), value); } }
Pipe conllLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership (new File(conlllexdir + "conll/CONLLTWOPER")), new TrieLexiconMembership (new File(conlllexdir + "conll/CONLLTWOLOC")), Pipe googleLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership (new File(conlllexdir + "googlesets/ORGSOCCER")), new TrieLexiconMembership (new File(conlllexdir + "googlesets/ORGGOVT")), Pipe fixedLexiconsPipe = new SerialPipes (new Pipe[] { new LexiconMembership ("FIRSTHIGHEST", new File(conlllexdir + "personname/ssdi.prfirsthighest"), true), new LexiconMembership ("FIRSTHIGH", new File(conlllexdir + "personname/ssdi.prfirsthigh"), true), Pipe idfLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership ("IDF_DES", new File(idflexdir + "designator.data"), true), new TrieLexiconMembership ("IDF_FIR", new File(idflexdir + "firstnames.data"), true), Pipe spellingFeaturesPipe = new SerialPipes (new Pipe[] { new RegexMatches ("INITCAP", Pattern.compile (CAPS+".*")), new RegexMatches ("CAPITALIZED", Pattern.compile (CAPS+LOW+"*")), SerialPipes p = new SerialPipes (new Pipe[] { new EnronMessage2TokenSequence (), Alphabet targets = p.getTargetAlphabet(); System.out.print ("State labels:"); for (int i = 0; i < targets.size(); i++) System.out.print (" " + targets.lookupObject(i)); System.out.println (""); System.out.println ("Number of features = "+p.getDataAlphabet().size());
public static void main (String[] args) { try { for (int i = 0; i < args.length; i++) { Instance carrier = new Instance (new File(args[i]), null, null, null); SerialPipes p = new SerialPipes (new Pipe[] { new Input2CharSequence (), new CharSequence2TokenSequence(new CharSequenceLexer())}); carrier = p.newIteratorFrom (new SingleInstanceIterator(carrier)).next(); TokenSequence ts = (TokenSequence) carrier.getData(); System.out.println ("==="); System.out.println (args[i]); System.out.println (ts.toString()); } } catch (Exception e) { System.out.println (e); e.printStackTrace(); } }
/** * Transfer some Pipes from the feature pipe to the tokenization pipe. * The feature pipe must be a SerialPipes. This will destructively modify the CRF object of the extractor. * This is useful if you have a CRF hat has been trained from a single pipe, which you need to split up * int feature and tokenization pipes */ public void slicePipes (int num) { Pipe fpipe = getFeaturePipe (); if (!(fpipe instanceof SerialPipes)) throw new IllegalArgumentException ("slicePipes: FeaturePipe must be a SerialPipes."); SerialPipes sp = (SerialPipes) fpipe; ArrayList pipes = new ArrayList (); for (int i = 0; i < num; i++) { pipes.add (sp.getPipe (0)); //sp.removePipe (0); TODO Fix this } //setTokenizationPipe (sp); TODO Fix this throw new UnsupportedOperationException ("Not yet implemented..."); }
String placelexdir = "/usr/col/tmp1/weili/Resource/places"; Pipe conllLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership (new File(conlllexdir + "conll/CONLLTWOPER")), new TrieLexiconMembership (new File(conlllexdir + "conll/CONLLTWOLOC")), Pipe googleLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership (new File(conlllexdir + "googlesets/ORGSOCCER")), new TrieLexiconMembership (new File(conlllexdir + "googlesets/ORGGOVT")), Pipe fixedLexiconsPipe = new SerialPipes (new Pipe[] { new LexiconMembership ("FIRSTHIGHEST", new File(conlllexdir + "personname/ssdi.prfirsthighest"), true), new LexiconMembership ("FIRSTHIGH", new File(conlllexdir + "personname/ssdi.prfirsthigh"), true), Pipe idfLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership ("IDF_DES", new File(idflexdir + "designator.data"), true), new TrieLexiconMembership ("IDF_FIR", new File(idflexdir + "firstnames.data"), true), SerialPipes p = new SerialPipes (new Pipe[] { new EnronMessage2TokenSequence (), Alphabet targets = p.getTargetAlphabet(); System.out.print ("State labels:"); for (int i = 0; i < targets.size(); i++) System.out.print (" " + targets.lookupObject(i)); System.out.println (""); System.out.println ("Number of features = "+p.getDataAlphabet().size());
public static void main (String[] args) { try { for (int i = 0; i < args.length; i++) { Instance carrier = new Instance (new File(args[i]), null, null, null); SerialPipes p = new SerialPipes (new Pipe[] { new Input2CharSequence (), new CharSequence2TokenSequence(new CharSequenceLexer())}); carrier = p.newIteratorFrom (new SingleInstanceIterator(carrier)).next(); TokenSequence ts = (TokenSequence) carrier.getData(); System.out.println ("==="); System.out.println (args[i]); System.out.println (ts.toString()); } } catch (Exception e) { System.out.println (e); e.printStackTrace(); } }
public static void setInProduction(Pipe p, boolean value) { if (p instanceof AddClassifierTokenPredictions) ((AddClassifierTokenPredictions) p).setInProduction(value); else if (p instanceof SerialPipes) { SerialPipes sp = (SerialPipes) p; for (int i = 0; i < sp.size(); i++) setInProduction(sp.getPipe(i), value); } }
/** * Transfer some Pipes from the feature pipe to the tokenization pipe. * The feature pipe must be a SerialPipes. This will destructively modify the CRF object of the extractor. * This is useful if you have a CRF hat has been trained from a single pipe, which you need to split up * int feature and tokenization pipes */ public void slicePipes (int num) { Pipe fpipe = getFeaturePipe (); if (!(fpipe instanceof SerialPipes)) throw new IllegalArgumentException ("slicePipes: FeaturePipe must be a SerialPipes."); SerialPipes sp = (SerialPipes) fpipe; ArrayList pipes = new ArrayList (); for (int i = 0; i < num; i++) { pipes.add (sp.getPipe (0)); //sp.removePipe (0); TODO Fix this } //setTokenizationPipe (sp); TODO Fix this throw new UnsupportedOperationException ("Not yet implemented..."); }
public SerialPipes newSerialPipesFromRange (int start, int end) { return new SerialPipes(pipes.subList(start, end)); }
String placelexdir = "/usr/col/tmp1/weili/Resource/places"; Pipe conllLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership (new File(conlllexdir + "conll/CONLLTWOPER")), new TrieLexiconMembership (new File(conlllexdir + "conll/CONLLTWOLOC")), Pipe googleLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership (new File(conlllexdir + "googlesets/ORGSOCCER")), new TrieLexiconMembership (new File(conlllexdir + "googlesets/ORGGOVT")), Pipe fixedLexiconsPipe = new SerialPipes (new Pipe[] { new LexiconMembership ("FIRSTHIGHEST", new File(conlllexdir + "personname/ssdi.prfirsthighest"), true), new LexiconMembership ("FIRSTHIGH", new File(conlllexdir + "personname/ssdi.prfirsthigh"), true), Pipe idfLexiconsPipe = new SerialPipes (new Pipe[] { new TrieLexiconMembership ("IDF_DES", new File(idflexdir + "designator.data"), true), new TrieLexiconMembership ("IDF_FIR", new File(idflexdir + "firstnames.data"), true), SerialPipes p = new SerialPipes (new Pipe[] { new EnronMessage2TokenSequence (), Alphabet targets = p.getTargetAlphabet(); System.out.print ("State labels:"); for (int i = 0; i < targets.size(); i++) System.out.print (" " + targets.lookupObject(i)); System.out.println (""); System.out.println ("Number of features = "+p.getDataAlphabet().size());
public static void main (String[] args) { try { for (int i = 0; i < args.length; i++) { Instance carrier = new Instance (new File(args[i]), null, null, null); SerialPipes p = new SerialPipes (new Pipe[] { new Input2CharSequence (), new CharSequence2TokenSequence(new CharSequenceLexer())}); carrier = p.newIteratorFrom (new SingleInstanceIterator(carrier)).next(); TokenSequence ts = (TokenSequence) carrier.getData(); System.out.println ("==="); System.out.println (args[i]); System.out.println (ts.toString()); } } catch (Exception e) { System.out.println (e); e.printStackTrace(); } }
static CRFExtractor hackCrfExtor (CRF crf) { Pipe[] newPipes = new Pipe [3]; SerialPipes pipes = (SerialPipes) crf.getInputPipe (); for (int i = 0; i < 3; i++) { Pipe p0 = pipes.getPipe (0); //pipes.removePipe (0); TODO Fix me //p0.setParent (null); newPipes[i] = p0; } Pipe tokPipe = new SerialPipes (newPipes); CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe); return extor; }
public static void setInProduction(Pipe p, boolean value) { if (p instanceof AddClassifierTokenPredictions) ((AddClassifierTokenPredictions) p).setInProduction(value); else if (p instanceof SerialPipes) { SerialPipes sp = (SerialPipes) p; for (int i = 0; i < sp.size(); i++) setInProduction(sp.getPipe(i), value); } }
public SerialPipes (Collection<Pipe> pipeList) { pipes = new ArrayList<Pipe> (pipeList); resolveAlphabets(); }
/** * Transfer some Pipes from the feature pipe to the tokenization pipe. * The feature pipe must be a SerialPipes. This will destructively modify the CRF object of the extractor. * This is useful if you have a CRF hat has been trained from a single pipe, which you need to split up * int feature and tokenization pipes */ public void slicePipes (int num) { Pipe fpipe = getFeaturePipe (); if (!(fpipe instanceof SerialPipes)) throw new IllegalArgumentException ("slicePipes: FeaturePipe must be a SerialPipes."); SerialPipes sp = (SerialPipes) fpipe; ArrayList pipes = new ArrayList (); for (int i = 0; i < num; i++) { pipes.add (sp.getPipe (0)); //sp.removePipe (0); TODO Fix this } //setTokenizationPipe (sp); TODO Fix this throw new UnsupportedOperationException ("Not yet implemented..."); }
public SerialPipes newSerialPipesFromRange (int start, int end) { return new SerialPipes(pipes.subList(start, end)); }
public SerialPipes (Collection<Pipe> pipeList) { pipes = new ArrayList<Pipe> (pipeList); resolveAlphabets(); }