/** * Trains a classifier from a Collection of sequences. * Note that the Collection can be (and usually is) an ObjectBank. * * @param docs An ObjectBank or a collection of sequences of IN */ public void train(Collection<List<IN>> docs) { train(docs, defaultReaderAndWriter()); }
/** * Load a test file, run the classifier on it, and then print the answers to * stdout (with timing to stderr). This uses the value of flags.documentReader * to determine testFile format. By default, this means that it is set up to * read a tab-separated columns test file * * @param testFile The file to test on. * @param outputScores Whether to calculate and then log performance scores (P/R/F1) * @return A Triple of P/R/F1 if outputScores is true, else null */ public Triple<Double,Double,Double> classifyAndWriteAnswers(String testFile, boolean outputScores) throws IOException { return classifyAndWriteAnswers(testFile, defaultReaderAndWriter(), outputScores); }
/** * Train the classifier based on values in flags. It will use the first of * these variables that is defined: trainFiles (and baseTrainDir), * trainFileList, trainFile. */ public void train() { if (flags.trainFiles != null) { train(flags.baseTrainDir, flags.trainFiles, defaultReaderAndWriter()); } else if (flags.trainFileList != null) { String[] files = flags.trainFileList.split(","); train(files, defaultReaderAndWriter()); } else { train(flags.trainFile, defaultReaderAndWriter()); } }
/** * Have a word segmenter segment a String into a list of words. * ONLY USE IF YOU LOADED A CHINESE WORD SEGMENTER!!!!! * * @param sentence The string to be classified * @return List of words */ // todo: This method is currently [2016] only called in a very small number of places: // the parser's jsp webapp, ChineseSegmenterAnnotator, and SegDemo. // Maybe we could eliminate it? // It also seems like it should be using the plainTextReaderAndWriter, not default? public List<String> segmentString(String sentence) { return segmentString(sentence, defaultReaderAndWriter()); }
public ObjectBank<List<IN>> makeObjectBankFromFile(String filename) { return makeObjectBankFromFile(filename, defaultReaderAndWriter()); }
/** This is the default DocumentReaderAndWriter used for reading text files for runtime * classification. It is the DocumentReaderAndWriter specified by the plainTextDocumentReaderAndWriter * flag and defaults to {@code edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter} which * is suitable for reading plain text files, in languages with a Tokenizer available. * This reader is now allocated lazily when required, since many times (such as when using * AbstractSequenceClassifiers in StanfordCoreNLP, these DocumentReaderAndWriters are never used. * Synchronized for safe lazy initialization. * * @return The default plain text DocumentReaderAndWriter */ public synchronized DocumentReaderAndWriter<IN> plainTextReaderAndWriter() { if (plainTextReaderAndWriter == null) { if (flags.readerAndWriter != null && flags.readerAndWriter.equals(flags.plainTextDocumentReaderAndWriter)) { plainTextReaderAndWriter = defaultReaderAndWriter(); } else { plainTextReaderAndWriter = makePlainTextReaderAndWriter(); } } return plainTextReaderAndWriter; }
public void train(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = true; train(makeObjectBankFromFile(filename, readerAndWriter), readerAndWriter); }
/** * Trains a classifier from a Collection of sequences. * Note that the Collection can be (and usually is) an ObjectBank. * * @param docs An ObjectBank or a collection of sequences of IN */ public void train(Collection<List<IN>> docs) { train(docs, defaultReaderAndWriter()); }
/** * Have a word segmenter segment a String into a list of words. * ONLY USE IF YOU LOADED A CHINESE WORD SEGMENTER!!!!! * * @param sentence The string to be classified * @return List of words */ // todo: This method is currently [2016] only called in a very small number of places: // the parser's jsp webapp, ChineseSegmenterAnnotator, and SegDemo. // Maybe we could eliminate it? // It also seems like it should be using the plainTextReaderAndWriter, not default? public List<String> segmentString(String sentence) { return segmentString(sentence, defaultReaderAndWriter()); }
public ObjectBank<List<IN>> makeObjectBankFromFile(String filename) { return makeObjectBankFromFile(filename, defaultReaderAndWriter()); }
/** * Load a test file, run the classifier on it, and then print the answers to * stdout (with timing to stderr). This uses the value of flags.documentReader * to determine testFile format. By default, this means that it is set up to * read a tab-separated columns test file * * @param testFile The file to test on. * @param outputScores Whether to calculate and then log performance scores (P/R/F1) * @return A Triple of P/R/F1 if outputScores is true, else null */ public Triple<Double,Double,Double> classifyAndWriteAnswers(String testFile, boolean outputScores) throws IOException { return classifyAndWriteAnswers(testFile, defaultReaderAndWriter(), outputScores); }
/** * Train the classifier based on values in flags. It will use the first of * these variables that is defined: trainFiles (and baseTrainDir), * trainFileList, trainFile. */ public void train() { if (flags.trainFiles != null) { train(flags.baseTrainDir, flags.trainFiles, defaultReaderAndWriter()); } else if (flags.trainFileList != null) { String[] files = flags.trainFileList.split(","); train(files, defaultReaderAndWriter()); } else { train(flags.trainFile, defaultReaderAndWriter()); } }
/** This is the default DocumentReaderAndWriter used for reading text files for runtime * classification. It is the DocumentReaderAndWriter specified by the plainTextDocumentReaderAndWriter * flag and defaults to {@code edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter} which * is suitable for reading plain text files, in languages with a Tokenizer available. * This reader is now allocated lazily when required, since many times (such as when using * AbstractSequenceClassifiers in StanfordCoreNLP, these DocumentReaderAndWriters are never used. * Synchronized for safe lazy initialization. * * @return The default plain text DocumentReaderAndWriter */ public synchronized DocumentReaderAndWriter<IN> plainTextReaderAndWriter() { if (plainTextReaderAndWriter == null) { if (flags.readerAndWriter != null && flags.readerAndWriter.equals(flags.plainTextDocumentReaderAndWriter)) { plainTextReaderAndWriter = defaultReaderAndWriter(); } else { plainTextReaderAndWriter = makePlainTextReaderAndWriter(); } } return plainTextReaderAndWriter; }
public void train(String filename, DocumentReaderAndWriter<IN> readerAndWriter) { // only for the OCR data does this matter // flags.ocrTrain = true; train(makeObjectBankFromFile(filename, readerAndWriter), readerAndWriter); }