public void train(Annotation doc) { List<List<CoreLabel>> trainingSet = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO); if(SAVE_CONLL_2003){ // dump a file in CoNLL-2003 format try { PrintStream os = new PrintStream(new FileOutputStream("train.conll")); // saveCoNLLFiles("/tmp/ace/train/", doc, useSubTypes, useBIO); saveCoNLL(os, trainingSet, useBIO); os.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } this.classifier = createClassifier(); if (trainingSet.size() > 0) { this.classifier.train(Collections.unmodifiableCollection(trainingSet)); } }
/** * Train a segmenter from raw text. Gold segmentation markers are required. */ public void train() { boolean hasSegmentationMarkers = true; boolean hasTags = true; DocumentReaderAndWriter<CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, noRewrites, tf); ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.trainFile, docReader); classifier.train(lines, docReader); log.info("Finished training."); }
crf.train(); crf.knownLCWords.setMaxSize(knownLCWordsLimit); timing.done(log, "CRFClassifier training");
public void train(Annotation doc) { List<List<CoreLabel>> trainingSet = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO); if(SAVE_CONLL_2003){ // dump a file in CoNLL-2003 format try { PrintStream os = new PrintStream(new FileOutputStream("train.conll")); // saveCoNLLFiles("/tmp/ace/train/", doc, useSubTypes, useBIO); saveCoNLL(os, trainingSet, useBIO); os.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } this.classifier = createClassifier(); if (trainingSet.size() > 0) { this.classifier.train(Collections.unmodifiableCollection(trainingSet)); } }
public void train(Annotation doc) { List<List<CoreLabel>> trainingSet = AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO); if(SAVE_CONLL_2003){ // dump a file in CoNLL-2003 format try { PrintStream os = new PrintStream(new FileOutputStream("train.conll")); // saveCoNLLFiles("/tmp/ace/train/", doc, useSubTypes, useBIO); saveCoNLL(os, trainingSet, useBIO); os.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } this.classifier = createClassifier(); if (trainingSet.size() > 0) { this.classifier.train(Collections.unmodifiableCollection(trainingSet)); } }
/** * Train a segmenter from raw text. Gold segmentation markers are required. */ public void train() { boolean hasSegmentationMarkers = true; boolean hasTags = true; DocumentReaderAndWriter<CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, noRewrites, tf); ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.trainFile, docReader); classifier.train(lines, docReader); System.err.println("Finished training."); }
/** * Train a segmenter from raw text. Gold segmentation markers are required. */ public void train() { boolean hasSegmentationMarkers = true; boolean hasTags = true; DocumentReaderAndWriter<CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, noRewrites, tf); ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.trainFile, docReader); classifier.train(lines, docReader); log.info("Finished training."); }
public void train(ListMatrix<ListMatrix<MapMatrix<String, String>>> listMatrix) throws Exception { List<List<CoreLabel>> sentenceList = new ArrayList<List<CoreLabel>>(); for (ListMatrix<MapMatrix<String, String>> innerList : listMatrix) { List<CoreLabel> tokenList = new ArrayList<CoreLabel>(); sentenceList.add(tokenList); for (MapMatrix<String, String> mapMatrix : innerList) { CoreLabel l = new CoreLabel(); l.set(CoreAnnotations.TextAnnotation.class, mapMatrix.getAsString("Token")); l.set(CoreAnnotations.AnswerAnnotation.class, mapMatrix.getAsString("Class")); tokenList.add(l); } } SeqClassifierFlags flags = new SeqClassifierFlags(); flags.maxLeft = 3; flags.useClassFeature = true; flags.useWord = true; flags.maxNGramLeng = 6; flags.usePrev = true; flags.useNext = true; flags.useDisjunctive = true; flags.useSequences = true; flags.usePrevSequences = true; flags.useTypeSeqs = true; flags.useTypeSeqs2 = true; flags.useTypeySequences = true; flags.wordShape = WordShapeClassifier.WORDSHAPECHRIS2; flags.useNGrams = true; crf = new CRFClassifier<CoreLabel>(flags); crf.train(sentenceList, null); }
} else if (crf.flags.trainFile != null || crf.flags.trainFileList != null) { Timing timing = new Timing(); crf.train(); timing.done("CRFClassifier training"); } else {
crf.train(); crf.knownLCWords.setMaxSize(knownLCWordsLimit); timing.done(log, "CRFClassifier training");
crf.loadJarClassifier(crf.flags.loadJarClassifier, props); } else if (crf.flags.trainFile != null || crf.flags.trainFileList != null) { crf.train(); } else { crf.loadDefaultClassifier();