/** * The main entry point of the code. */ public static void main(String[] args) throws IOException { forceTrack("Processing treebanks"); List<Pair<CoreMap, Collection<Pair<Span, Span>>>> trainingData = new ArrayList<>(); trainingData.addAll(processDirectory("WSJ", new File("/home/gabor/lib/data/penn_treebank/wsj"))); trainingData.addAll(processDirectory("Brown", new File("/home/gabor/lib/data/penn_treebank/brown"))); endTrack("Processing treebanks"); forceTrack("Training"); log("dataset size: " + trainingData.size()); ClauseSplitter.train( trainingData.stream(), new File("/home/gabor/tmp/clauseSearcher.ser.gz"), new File("/home/gabor/tmp/clauseSearcherData.tab.gz")); endTrack("Training"); // Execution.fillOptions(CreateClauseDataset.class, args); // // new CreateClauseDataset().runAndExit(in, System.err, code -> code); } }
startTrack( "Threads (" + title + ')'); for (Runnable toRun : runnables) { toRun.run(); } endTrack( "Threads (" + title + ')'); return;
default Accuracy computeAccuracy(Stream<Pair<KBPInput, String>> examples, Optional<PrintStream> predictOut) { forceTrack("Accuracy"); Accuracy accuracy = new Accuracy(); AtomicInteger testI = new AtomicInteger(0); DecimalFormat confidenceFormat = new DecimalFormat("0.0000"); forceTrack("Featurizing"); examples.parallel().map(example -> { Pair<String, Double> predicted = this.classify(example.first); synchronized (accuracy) { accuracy.predict(Collections.singleton(predicted.first), Collections.singleton(example.second)); } if (testI.incrementAndGet() % 1000 == 0) { log(KBPRelationExtractor.class, "[" + testI.get() + "] " + accuracy.toOneLineString()); } return predicted.first + "\t" + confidenceFormat.format(predicted.second); }) .forEachOrdered(line -> { if (predictOut.isPresent()) { predictOut.get().println(line); } }); endTrack("Featurizing"); log(accuracy.toString()); endTrack("Accuracy"); return accuracy; }
List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE); log.info("Read " + testExamples.size() + " examples"); endTrack("Test data"); log.info("Read " + trainExamples.size() + " examples"); log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION); endTrack("Training data"); endTrack("Creating dataset");
startTrack( "Threads (" + title + ")" ); for (Runnable toRun : runnables) { toRun.run(); } endTrack( "Threads (" + title + ")" ); return;
public static void exec(Runnable toRun, Properties options, boolean exit) { //--Init //(bootstrap) Map<String, Field> bootstrapMap = fillOptionsImpl(null, BOOTSTRAP_CLASSES, options, false); //bootstrap for (String key : bootstrapMap.keySet()) { options.remove(key); } startTrack("init"); //(fill options) Class<?>[] visibleClasses = optionClasses; if (visibleClasses == null) visibleClasses = getVisibleClasses(); //get classes fillOptionsImpl(null, visibleClasses, options);//fill endTrack("init"); // -- Setup Logging StanfordRedwoodConfiguration.apply(options); //--Run Program int exitCode = 0; startTrack("main"); try { toRun.run(); } catch (Throwable t) { log(FORCE, t); exitCode = 1; } endTracksTo("main"); // end main if (exit) { System.exit(exitCode); } }
startTrack( "Threads (" + title + ")" ); for (Runnable toRun : runnables) { toRun.run(); } endTrack( "Threads (" + title + ")" ); return;
startTrack( "Threads (" + title + ")" ); for (Runnable toRun : runnables) { toRun.run(); } endTrack( "Threads (" + title + ")" ); return;
default Accuracy computeAccuracy(Stream<Pair<KBPInput, String>> examples, Optional<PrintStream> predictOut) { forceTrack("Accuracy"); Accuracy accuracy = new Accuracy(); AtomicInteger testI = new AtomicInteger(0); DecimalFormat confidenceFormat = new DecimalFormat("0.0000"); forceTrack("Featurizing"); examples.parallel().map(example -> { Pair<String, Double> predicted = this.classify(example.first); synchronized (accuracy) { accuracy.predict(Collections.singleton(predicted.first), Collections.singleton(example.second)); } if (testI.incrementAndGet() % 1000 == 0) { log(KBPRelationExtractor.class, "[" + testI.get() + "] " + accuracy.toOneLineString()); } return predicted.first + "\t" + confidenceFormat.format(predicted.second); }) .forEachOrdered(line -> { if (predictOut.isPresent()) { predictOut.get().println(line); } }); endTrack("Featurizing"); log(accuracy.toString()); endTrack("Accuracy"); return accuracy; }
public static void endTracksTo(String check) { endTracksUntil(check); endTrack(check); }
public static void endTracksTo(String check) { endTracksUntil(check); endTrack(check); }
public static void endTracksTo(String check) { endTracksUntil(check); endTrack(check); }
endTrack("Training inference"); endTrack("Training"); if (modelPath.isPresent()) { Pair<Classifier<ClauseClassifierLabel,String>, Featurizer> toSave = Pair.makePair(fullClassifier, featurizer); dataset.randomize(42L); Util.dumpAccuracy(fullClassifier, dataset); endTrack("Training accuracy"); Pair<GeneralDataset<ClauseClassifierLabel, String>, GeneralDataset<ClauseClassifierLabel, String>> foldData = dataset.splitOutFold(fold, numFolds); Classifier<ClauseClassifierLabel, String> classifier = factory.trainClassifier(foldData.first); endTrack("Training"); forceTrack("Test"); Util.dumpAccuracy(classifier, foldData.second); endTrack("Test"); endTrack("Fold " + (fold + 1)); endTrack(numFolds + " fold cross-validation");
endTrack("Processing " + name); return trainingData;
List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE); log.info("Read " + testExamples.size() + " examples"); endTrack("Test data"); log.info("Read " + trainExamples.size() + " examples"); log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION); endTrack("Training data"); endTrack("Creating dataset");
endTrack("main");
public static void endTracksTo(String check) { endTracksUntil(check); endTrack(check); }