@Override public void execute(TaskContext aContext) throws Exception { trainAndStoreModel(aContext); writeModelConfiguration(aContext); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // SvmHmm doesn't like negative values or zeros as dummy outcomes OUTCOME_PLACEHOLDER = "1"; }
private void trainAndStoreModel(TaskContext aContext) throws Exception { boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL); if (multiLabel) { throw new TextClassificationException("Multi-label is not yet implemented"); } File fileTrain = getTrainFile(aContext); trainModel(aContext, fileTrain); copyOutcomeMappingToThisFolder(aContext); copyFeatureNameMappingToThisFolder(aContext); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { File tempFile = createInputFile(aJCas); File prediction = runPrediction(tempFile); List<TextClassificationOutcome> outcomes = getOutcomeAnnotations(aJCas); List<String> writtenPredictions = FileUtils.readLines(prediction, UTF_8); checkErrorConditionNumberOfOutcomesEqualsNumberOfPredictions(outcomes, writtenPredictions); for (int i = 0; i < outcomes.size(); i++) { if (isRegression()) { String val = writtenPredictions.get(i); outcomes.get(i).setOutcome(val); } else { String val = writtenPredictions.get(i).replaceAll("\\.0", ""); String pred = integer2OutcomeMapping.get(val); outcomes.get(i).setOutcome(pred); } } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { integer2OutcomeMapping = loadInteger2OutcomeMapping(tcModelLocation); featureMapping = loadFeature2IntegerMapping(tcModelLocation); verifyTcVersion(tcModelLocation, getClass()); } catch (Exception e) { throw new ResourceInitializationException(e); } }
private File createInputFile(JCas jcas) throws Exception { File tempFile = FileUtil.createTempFile("libsvm", ".txt"); tempFile.deleteOnExit(); BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(tempFile), "utf-8")); InstanceExtractor extractor = new InstanceExtractor(featureMode, featureExtractors, true); List<Instance> instances = extractor.getInstances(jcas, true); for (Instance instance : instances) { bw.write(OUTCOME_PLACEHOLDER); bw.write(injectSequenceId(instance)); for (Feature f : instance.getFeatures()) { if (!sanityCheckValue(f)) { continue; } bw.write("\t"); bw.write(featureMapping.get(f.getName()) + ":" + f.getValue()); } bw.write("\n"); } bw.close(); return tempFile; }
private void copyOutcomeMappingToThisFolder(TaskContext aContext) throws IOException { if (isRegression()) { return; } File trainDataFolder = aContext.getFolder(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY); String mapping = AdapterFormat.getOutcomeMappingFilename(); FileUtils.copyFile(new File(trainDataFolder, mapping), new File(outputFolder, mapping)); }
private Map<String, String> loadInteger2OutcomeMapping(File tcModelLocation) throws IOException { if (isRegression()) { return new HashMap<>(); } Map<String, String> map = new HashMap<>(); List<String> readLines = FileUtils.readLines( new File(tcModelLocation, AdapterFormat.getOutcomeMappingFilename()), "utf-8"); for (String l : readLines) { String[] split = l.split("\t"); map.put(split[1], split[0]); } return map; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { try { File tempFile = createInputFile(jcas); File prediction = runPrediction(tempFile); List<TextClassificationOutcome> outcomes = getOutcomeAnnotations(jcas); List<String> writtenPredictions = FileUtils.readLines(prediction, "utf-8"); checkErrorConditionNumberOfOutcomesEqualsNumberOfPredictions(outcomes, writtenPredictions); for (int i = 0; i < outcomes.size(); i++) { if (isRegression()) { String val = writtenPredictions.get(i); outcomes.get(i).setOutcome(val); } else { String val = writtenPredictions.get(i).replaceAll("\\.0", ""); String pred = integer2OutcomeMapping.get(val); outcomes.get(i).setOutcome(pred); } } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // SvmHmm doesn't like negative values or zeros as dummy outcomes OUTCOME_PLACEHOLDER = "1"; }
protected void trainAndStoreModel(TaskContext aContext) throws Exception { boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL); if (multiLabel) { throw new TextClassificationException("Multi-label is not yet implemented"); } File fileTrain = getTrainFile(aContext); trainModel(aContext, fileTrain); copyOutcomeMappingToThisFolder(aContext); copyFeatureNameMappingToThisFolder(aContext); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { integer2OutcomeMapping = loadInteger2OutcomeMapping(tcModelLocation); featureMapping = loadFeature2IntegerMapping(tcModelLocation); verifyTcVersion(tcModelLocation, getClass()); } catch (Exception e) { throw new ResourceInitializationException(e); } }
@Override public void execute(TaskContext aContext) throws Exception { trainAndStoreModel(aContext); writeModelConfiguration(aContext); }
protected File createInputFile(JCas jcas) throws Exception { File tempFile = FileUtil.createTempFile("libsvm", ".txt"); tempFile.deleteOnExit(); try (BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(tempFile), UTF_8))) { InstanceExtractor extractor = new InstanceExtractor(featureMode, featureExtractors, true); List<Instance> instances = extractor.getInstances(jcas, true); for (Instance instance : instances) { bw.write(OUTCOME_PLACEHOLDER); bw.write(injectSequenceId(instance)); for (Feature f : instance.getFeatures()) { if (!sanityCheckValue(f)) { continue; } bw.write("\t"); bw.write(featureMapping.get(f.getName()) + ":" + f.getValue()); } bw.write("\n"); } } return tempFile; }
protected void copyOutcomeMappingToThisFolder(TaskContext aContext) throws IOException { if (isRegression()) { return; } File trainDataFolder = aContext.getFolder(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY); String mapping = AdapterFormat.getOutcomeMappingFilename(); FileUtils.copyFile(new File(trainDataFolder, mapping), new File(outputFolder, mapping)); }
protected Map<String, String> loadInteger2OutcomeMapping(File tcModelLocation) throws IOException { if (isRegression()) { return new HashMap<>(); } Map<String, String> map = new HashMap<>(); List<String> readLines = FileUtils.readLines( new File(tcModelLocation, AdapterFormat.getOutcomeMappingFilename()), UTF_8); for (String l : readLines) { String[] split = l.split("\t"); map.put(split[1], split[0]); } return map; }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { model = svm.svm_load_model(new File(tcModelLocation, MODEL_CLASSIFIER).getAbsolutePath()); } catch (Exception e) { throw new ResourceInitializationException(e); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { model = svm .svm_load_model(new File(tcModelLocation, MODEL_CLASSIFIER).getAbsolutePath()); } catch (Exception e) { throw new ResourceInitializationException(e); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { liblinearModel = Linear.loadModel(new File(tcModelLocation, MODEL_CLASSIFIER)); } catch (Exception e) { throw new ResourceInitializationException(e); } }