private static CRFClassifier<CoreLabel> chooseCRFClassifier(SeqClassifierFlags flags) { CRFClassifier<CoreLabel> crf; // initialized in if/else if (flags.useFloat) { crf = new CRFClassifierFloat<>(flags); } else if (flags.nonLinearCRF) { crf = new CRFClassifierNonlinear<>(flags); } else if (flags.numLopExpert > 1) { crf = new CRFClassifierWithLOP<>(flags); } else if (flags.priorType.equals("DROPOUT")) { crf = new CRFClassifierWithDropout<>(flags); } else if (flags.useNoisyLabel) { crf = new CRFClassifierNoisyLabel<>(flags); } else { crf = new CRFClassifier<>(flags); } return crf; }
@Override protected CRFLogConditionalObjectiveFunction getObjectiveFunction(int[][][][] data, int[][] labels) { int[][][][] unsupDropoutData = null; if (unsupDocs != null) { Timing timer = new Timing(); timer.start(); List<Triple<int[][][], int[], double[][][]>> unsupDataAndLabels = documentsToDataAndLabelsList(unsupDocs); unsupDropoutData = new int[unsupDataAndLabels.size()][][][]; for (int q=0; q<unsupDropoutData.length; q++) unsupDropoutData[q] = unsupDataAndLabels.get(q).first(); long elapsedMs = timer.stop(); log.info("Time to read unsupervised dropout data: " + Timing.toSecondsString(elapsedMs) + " seconds, read " + unsupDropoutData.length + " files"); } return new CRFLogConditionalObjectiveFunctionWithDropout(data, labels, windowSize, classIndex, labelIndices, map, flags.priorType, flags.backgroundSymbol, flags.sigma, null, flags.dropoutRate, flags.dropoutScale, flags.multiThreadGrad, flags.dropoutApprox, flags.unsupDropoutScale, unsupDropoutData); }
@Override protected Collection<List<IN>> loadAuxiliaryData(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter) { if (flags.unsupDropoutFile != null) { log.info("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile); Timing timer = new Timing(); timer.start(); unsupDocs = new ArrayList<>(); ObjectBank<List<IN>> unsupObjBank = makeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter); for (List<IN> doc : unsupObjBank) { for (IN tok: doc) { tok.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol); tok.set(CoreAnnotations.GoldAnswerAnnotation.class, flags.backgroundSymbol); } unsupDocs.add(doc); } long elapsedMs = timer.stop(); log.info("Time to read: : " + Timing.toSecondsString(elapsedMs) + " seconds"); } if (unsupDocs != null && flags.doFeatureDiscovery) { List<List<IN>> totalDocs = new ArrayList<>(); totalDocs.addAll(docs); totalDocs.addAll(unsupDocs); return totalDocs; } else return docs; }
@Override protected CRFLogConditionalObjectiveFunction getObjectiveFunction(int[][][][] data, int[][] labels) { int[][][][] unsupDropoutData = null; if (unsupDocs != null) { Timing timer = new Timing(); timer.start(); List<Triple<int[][][], int[], double[][][]>> unsupDataAndLabels = documentsToDataAndLabelsList(unsupDocs); unsupDropoutData = new int[unsupDataAndLabels.size()][][][]; for (int q=0; q<unsupDropoutData.length; q++) unsupDropoutData[q] = unsupDataAndLabels.get(q).first(); long elapsedMs = timer.stop(); System.err.println("Time to read unsupervised dropout data: " + Timing.toSecondsString(elapsedMs) + " seconds, read " + unsupDropoutData.length + " files"); } return new CRFLogConditionalObjectiveFunctionWithDropout(data, labels, windowSize, classIndex, labelIndices, map, flags.priorType, flags.backgroundSymbol, flags.sigma, null, flags.dropoutRate, flags.dropoutScale, flags.multiThreadGrad, flags.dropoutApprox, flags.unsupDropoutScale, unsupDropoutData); } } // end class CRFClassifier
private static CRFClassifier<CoreLabel> chooseCRFClassifier(SeqClassifierFlags flags) { CRFClassifier<CoreLabel> crf; // initialized in if/else if (flags.useFloat) { crf = new CRFClassifierFloat<CoreLabel>(flags); } else if (flags.nonLinearCRF) { crf = new CRFClassifierNonlinear<CoreLabel>(flags); } else if (flags.numLopExpert > 1) { crf = new CRFClassifierWithLOP<CoreLabel>(flags); } else if (flags.priorType.equals("DROPOUT")) { crf = new CRFClassifierWithDropout<CoreLabel>(flags); } else if (flags.useNoisyLabel) { crf = new CRFClassifierNoisyLabel<CoreLabel>(flags); } else { crf = new CRFClassifier<CoreLabel>(flags); } return crf; }
@Override protected Collection<List<IN>> loadAuxiliaryData(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter) { if (flags.unsupDropoutFile != null) { System.err.println("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile); Timing timer = new Timing(); timer.start(); unsupDocs = new ArrayList<List<IN>>(); ObjectBank<List<IN>> unsupObjBank = makeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter); for (List<IN> doc : unsupObjBank) { for (IN tok: doc) { tok.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol); tok.set(CoreAnnotations.GoldAnswerAnnotation.class, flags.backgroundSymbol); } unsupDocs.add(doc); } long elapsedMs = timer.stop(); System.err.println("Time to read: : " + Timing.toSecondsString(elapsedMs) + " seconds"); } if (unsupDocs != null && flags.doFeatureDiscovery) { List<List<IN>> totalDocs = new ArrayList<List<IN>>(); totalDocs.addAll(docs); totalDocs.addAll(unsupDocs); return totalDocs; } else return docs; }
@Override protected CRFLogConditionalObjectiveFunction getObjectiveFunction(int[][][][] data, int[][] labels) { int[][][][] unsupDropoutData = null; if (unsupDocs != null) { Timing timer = new Timing(); timer.start(); List<Triple<int[][][], int[], double[][][]>> unsupDataAndLabels = documentsToDataAndLabelsList(unsupDocs); unsupDropoutData = new int[unsupDataAndLabels.size()][][][]; for (int q=0; q<unsupDropoutData.length; q++) unsupDropoutData[q] = unsupDataAndLabels.get(q).first(); long elapsedMs = timer.stop(); log.info("Time to read unsupervised dropout data: " + Timing.toSecondsString(elapsedMs) + " seconds, read " + unsupDropoutData.length + " files"); } return new CRFLogConditionalObjectiveFunctionWithDropout(data, labels, windowSize, classIndex, labelIndices, map, flags.priorType, flags.backgroundSymbol, flags.sigma, null, flags.dropoutRate, flags.dropoutScale, flags.multiThreadGrad, flags.dropoutApprox, flags.unsupDropoutScale, unsupDropoutData); }
private static CRFClassifier<CoreLabel> chooseCRFClassifier(SeqClassifierFlags flags) { CRFClassifier<CoreLabel> crf; // initialized in if/else if (flags.useFloat) { crf = new CRFClassifierFloat<>(flags); } else if (flags.nonLinearCRF) { crf = new CRFClassifierNonlinear<>(flags); } else if (flags.numLopExpert > 1) { crf = new CRFClassifierWithLOP<>(flags); } else if (flags.priorType.equals("DROPOUT")) { crf = new CRFClassifierWithDropout<>(flags); } else if (flags.useNoisyLabel) { crf = new CRFClassifierNoisyLabel<>(flags); } else { crf = new CRFClassifier<>(flags); } return crf; }
@Override protected Collection<List<IN>> loadAuxiliaryData(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter) { if (flags.unsupDropoutFile != null) { log.info("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile); Timing timer = new Timing(); timer.start(); unsupDocs = new ArrayList<>(); ObjectBank<List<IN>> unsupObjBank = makeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter); for (List<IN> doc : unsupObjBank) { for (IN tok: doc) { tok.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol); tok.set(CoreAnnotations.GoldAnswerAnnotation.class, flags.backgroundSymbol); } unsupDocs.add(doc); } long elapsedMs = timer.stop(); log.info("Time to read: : " + Timing.toSecondsString(elapsedMs) + " seconds"); } if (unsupDocs != null && flags.doFeatureDiscovery) { List<List<IN>> totalDocs = new ArrayList<>(); totalDocs.addAll(docs); totalDocs.addAll(unsupDocs); return totalDocs; } else return docs; }