public void printLabelInformation(String testFile, DocumentReaderAndWriter<IN> readerAndWriter) throws Exception { ObjectBank<List<IN>> documents = makeObjectBankFromFile(testFile, readerAndWriter); for (List<IN> document : documents) { printLabelValue(document); } }
/** * Takes the file, reads it in, and prints out the factor table at each position. * * @param filename The path to the specified file */ public void printFactorTable(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFactorTableDocuments(docs); }
/** * Takes the file, reads it in, and prints out the likelihood of each possible * label at each point. This gives a simple way to examine the probability * distributions of the CRF. See {@code getCliqueTrees()} for more. * * @param filename The path to the specified file */ public void printFirstOrderProbs(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFirstOrderProbsDocuments(docs); }
/** * Want to make arbitrary probability queries? Then this is the method for * you. Given the filename, it reads it in and breaks it into documents, and * then makes a CRFCliqueTree for each document. you can then ask the clique * tree for marginals and conditional probabilities of almost anything you want. */ public List<CRFCliqueTree<String>> getCliqueTrees(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = false; List<CRFCliqueTree<String>> cts = new ArrayList<>(); ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); for (List<IN> doc : docs) { cts.add(getCliqueTree(doc)); } return cts; }
/** * Train a segmenter from raw text. Gold segmentation markers are required. */ public void train() { boolean hasSegmentationMarkers = true; boolean hasTags = true; DocumentReaderAndWriter<CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, noRewrites, tf); ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.trainFile, docReader); classifier.train(lines, docReader); log.info("Finished training."); }
tf); ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.testFile, docReader);
public static void main(String[] args) throws Exception { StringUtils.logInvocationString(log, args); Properties props = StringUtils.argsToProperties(args); CRFClassifier<CoreLabel> crf = new CRFClassifier<>(props); String inputFile = crf.flags.trainFile; if (inputFile == null) { log.info("Please provide input file using -trainFile"); System.exit(-1); } String outputFile = crf.flags.exportFeatures; if (outputFile == null) { log.info("Please provide output file using -exportFeatures"); System.exit(-1); } CRFFeatureExporter<CoreLabel> featureExporter = new CRFFeatureExporter<>(crf); Collection<List<CoreLabel>> docs = crf.makeObjectBankFromFile(inputFile, crf.makeReaderAndWriter()); crf.makeAnswerArraysAndTagIndex(docs); featureExporter.printFeatures(outputFile, docs); }
CRFClassifierEvaluator<IN> crfEvaluator = new CRFClassifierEvaluator<>("Test set (" + flags.testFile + ")", this); ObjectBank<List<IN>> testObjBank = makeObjectBankFromFile(flags.testFile, readerAndWriter); List<List<IN>> testDocs = new ArrayList<>(testObjBank); List<Triple<int[][][], int[], double[][][]>> testDataAndLabels = documentsToDataAndLabelsList(testDocs); for (String testFile : testFiles) { CRFClassifierEvaluator<IN> crfEvaluator = new CRFClassifierEvaluator<>("Test set (" + testFile + ')', this); ObjectBank<List<IN>> testObjBank = makeObjectBankFromFile(testFile, readerAndWriter); List<Triple<int[][][], int[], double[][][]>> testDataAndLabels = documentsToDataAndLabelsList(testObjBank); crfEvaluator.setTestData(testObjBank, testDataAndLabels);
public void printLabelInformation(String testFile, DocumentReaderAndWriter<IN> readerAndWriter) throws Exception { ObjectBank<List<IN>> documents = makeObjectBankFromFile(testFile, readerAndWriter); for (List<IN> document : documents) { printLabelValue(document); } }
/** * Takes the file, reads it in, and prints out the factor table at each position. * * @param filename * The path to the specified file */ public void printFactorTable(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFactorTableDocuments(docs); }
public void printLabelInformation(String testFile, DocumentReaderAndWriter<IN> readerAndWriter) throws Exception { ObjectBank<List<IN>> documents = makeObjectBankFromFile(testFile, readerAndWriter); for (List<IN> document : documents) { printLabelValue(document); } }
/** * Takes the file, reads it in, and prints out the factor table at each position. * * @param filename The path to the specified file */ public void printFactorTable(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFactorTableDocuments(docs); }
public void printLabelInformation(String testFile, DocumentReaderAndWriter readerAndWriter) throws Exception { ObjectBank<List<IN>> documents = makeObjectBankFromFile(testFile, readerAndWriter); for (List<IN> document : documents) { printLabelValue(document); } }
/** * Takes the file, reads it in, and prints out the likelihood of each possible * label at each point. This gives a simple way to examine the probability * distributions of the CRF. See {@code getCliqueTrees()} for more. * * @param filename The path to the specified file */ public void printFirstOrderProbs(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFirstOrderProbsDocuments(docs); }
/** * Takes the file, reads it in, and prints out the likelihood of * each possible label at each point. This gives a simple way to examine * the probability distributions of the CRF. See * <code>getCliqueTrees()</code> for more. * * @param filename The path to the specified file */ public void printFirstOrderProbs(String filename, DocumentReaderAndWriter readerAndWriter) { // only for the OCR data does this matter flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFirstOrderProbsDocuments(docs); }
/** * Takes the file, reads it in, and prints out the likelihood of each possible * label at each point. This gives a simple way to examine the probability * distributions of the CRF. See <code>getCliqueTrees()</code> for more. * * @param filename * The path to the specified file */ public void printFirstOrderProbs(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter flags.ocrTrain = false; ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); printFirstOrderProbsDocuments(docs); }
/** * Want to make arbitrary probability queries? Then this is the method for you. * Given the filename, it reads it in and breaks it into documents, and then makes * a CRFCliqueTree for each document. you can then ask the clique tree for marginals * and conditional probabilities of almost anything you want. */ public List<CRFCliqueTree> getCliqueTrees(String filename, DocumentReaderAndWriter readerAndWriter) { // only for the OCR data does this matter flags.ocrTrain = false; List<CRFCliqueTree> cts = new ArrayList<CRFCliqueTree>(); ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); for (List<IN> doc : docs) { cts.add(getCliqueTree(doc)); } return cts; }
/** * Want to make arbitrary probability queries? Then this is the method for * you. Given the filename, it reads it in and breaks it into documents, and * then makes a CRFCliqueTree for each document. you can then ask the clique * tree for marginals and conditional probabilities of almost anything you want. */ public List<CRFCliqueTree<String>> getCliqueTrees(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = false; List<CRFCliqueTree<String>> cts = new ArrayList<>(); ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); for (List<IN> doc : docs) { cts.add(getCliqueTree(doc)); } return cts; }
/** * Want to make arbitrary probability queries? Then this is the method for * you. Given the filename, it reads it in and breaks it into documents, and * then makes a CRFCliqueTree for each document. you can then ask the clique * tree for marginals and conditional probabilities of almost anything you * want. */ public List<CRFCliqueTree<String>> getCliqueTrees(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter flags.ocrTrain = false; List<CRFCliqueTree<String>> cts = new ArrayList<CRFCliqueTree<String>>(); ObjectBank<List<IN>> docs = makeObjectBankFromFile(filename, readerAndWriter); for (List<IN> doc : docs) { cts.add(getCliqueTree(doc)); } return cts; }
/** * Train a segmenter from raw text. Gold segmentation markers are required. */ public void train() { boolean hasSegmentationMarkers = true; boolean hasTags = true; DocumentReaderAndWriter<CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, noRewrites, tf); ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.trainFile, docReader); classifier.train(lines, docReader); System.err.println("Finished training."); }