public static File getModelJarFile(String directoryName) { return getModelJarFile(new File(directoryName)); }
private void cleanTrainingFiles(File directory) { File modelJarFile = JarClassifierBuilder.getModelJarFile(directory); for (File file : directory.listFiles()) { if (file.isDirectory()) { this.cleanTrainingFiles(file); } if (!file.equals(modelJarFile)) { file.delete(); } } }
/** * Create a classifier jar file from a directory where a classifier model was trained by * {@link #trainClassifier(File, String[])}. * * This method should typically not be overridden by subclasses - use * {@link #packageClassifier(File, JarOutputStream)} instead. * * @param dir * The directory where the classifier model was trained. */ public void packageClassifier(File dir) throws IOException { JarOutputStream modelStream = new JarOutputStream(new BufferedOutputStream( new FileOutputStream(getModelJarFile(dir))), this.manifest); this.packageClassifier(dir, modelStream); modelStream.close(); }
public URL getClassifierJarURL() { String dirName = getAnnotatorClass().getSimpleName().toLowerCase(); File resourceFile = JarClassifierBuilder.getModelJarFile(dirName); String resourceName = resourceFile.getPath().replaceAll("\\\\", "/"); URL url = this.getAnnotatorClass().getResource(resourceName); if (url == null) { String className = this.getAnnotatorClass().getName(); String format = "No classifier jar found at \"%s\" for class %s"; String message = String.format(format, resourceName, className); throw new MissingResourceException(message, className, resourceName); } return url; }
/** * Load a classifier packaged by {@link #packageClassifier(File)} from the jar file in the * training directory. * * This method should typically not be overridden by subclasses - use * {@link #unpackageClassifier(JarInputStream)} and {@link #newClassifier()} instead. * * @param dir * The directory where the classifier was trained and packaged. * @return The loaded classifier. */ public CLASSIFIER_TYPE loadClassifierFromTrainingDirectory(File dir) throws IOException { File modelJarFile = getModelJarFile(dir); InputStream inputStream = new BufferedInputStream(new FileInputStream(modelJarFile)); try { return this.loadClassifier(inputStream); } finally { inputStream.close(); } }
public AnalysisEngineDescription getAnnotatorDescription(File directory, Model.Params params) throws ResourceInitializationException { return AnalysisEngineFactory.createEngineDescription( this.annotatorClass, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(this.getModelDirectory(directory, params))); }
private static List<SequenceClassifier<String>> createUnderlyingClassifiers( File modelBaseDir, Iterable<String> tierIds) throws IOException { List<SequenceClassifier<String>> resultList = Lists.newArrayList(); for (String tierId : tierIds) { File tierModelDir = new File(modelBaseDir, tierId); File tierModelJar = JarClassifierBuilder.getModelJarFile(tierModelDir); JarSequenceClassifierFactory<String> clFactory = new JarSequenceClassifierFactory<>(); clFactory.setClassifierJarPath(tierModelJar.getPath()); org.cleartk.ml.SequenceClassifier<String> cl = clFactory.createClassifier(); resultList.add(cl); } return resultList; }
@Override protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory) throws Exception { AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createEngine(ModifierExtractorAnnotator.getDescription( GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(directory))); AnnotationStatistics<String> stats = new AnnotationStatistics<String>(); for (Iterator<JCas> casIter = new JCasIterator(collectionReader, classifierAnnotator); casIter.hasNext();) { JCas jCas = casIter.next(); JCas goldView; try { goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } Collection<Modifier> goldModifiers = JCasUtil.select(goldView, Modifier.class); Collection<Modifier> systemModifiers = JCasUtil.select(jCas, Modifier.class); stats.add(goldModifiers, systemModifiers); } System.err.print(stats); System.err.println(); return stats; }
@Override protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory) throws Exception { AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createEngine(ModifierExtractorAnnotator.getDescription( GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(directory))); AnnotationStatistics<String> stats = new AnnotationStatistics<String>(); for (Iterator<JCas> casIter = new JCasIterator(collectionReader, classifierAnnotator); casIter.hasNext();) { JCas jCas = casIter.next(); JCas goldView; try { goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } Collection<Modifier> goldModifiers = JCasUtil.select(goldView, Modifier.class); Collection<Modifier> systemModifiers = JCasUtil.select(jCas, Modifier.class); stats.add(goldModifiers, systemModifiers); } System.err.print(stats); System.err.println(); return stats; }
@Override public void packageClassifier(File dir, JarOutputStream modelStream) throws IOException { super.packageClassifier(dir, modelStream); JarStreams.putNextJarEntry( modelStream, DELEGATED_MODEL_FILE_NAME, JarClassifierBuilder.getModelJarFile(this.getDelegatedModelDirectory(dir))); JarStreams.putNextJarEntry( modelStream, OUTCOME_FEATURE_EXTRACTOR_FILE_NAME, getOutcomeFeatureExtractorsFile(dir)); }
File modelFile = JarClassifierBuilder.getModelJarFile(trainingDirectory); if (!file.isDirectory() && !file.equals(modelFile)) { file.delete();
if (modelDir.exists()) { for (File file : modelDir.listFiles()) { File modelFile = JarClassifierBuilder.getModelJarFile(modelDir); if (!file.isDirectory() && !file.equals(modelFile)) { file.delete();
+ JarClassifierBuilder.getModelJarFile(ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY).getPath());
public AggregateBuilder buildExtractAggregate() throws ResourceInitializationException { AggregateBuilder builder = new AggregateBuilder(); builder.add(AnalysisEngineFactory.createEngineDescription( XMIReader.class, XMIReader.PARAM_XMI_DIRECTORY, this.xmiDirectory)); // This will extract the features for summarization builder.add(AnalysisEngineFactory.createEngineDescription( SumBasicAnnotator.class, CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(this.modelDirectory), SumBasicAnnotator.PARAM_TOKEN_FIELD, this.tokenField.name(), SumBasicAnnotator.PARAM_STOPWORDS_URI, this.stopwordsFile.toURI())); if (this.sentencesOutFile != null && this.outputSentences) { builder.add(SummarySentenceWriterAnnotator.getDescription(sentencesOutFile, this.outputScores)); } return builder; }
.getClassifierDescription(JarClassifierBuilder.getModelJarFile( ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY).getPath()));
this.getXMIDirectory(directory, Stage.TEST).getPath())); for (ModelInfo<?> modelInfo : this.modelInfos) { File modelFile = JarClassifierBuilder.getModelJarFile(modelInfo.getModelSubdirectory(directory)); builder.add(modelInfo.modelFactory.getAnnotatorDescription(modelFile.getPath()));
false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(modelDirectory));
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); // a reader that loads the URIs of the text file CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.getTextFile())); // assemble the classification pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // annotators that identify sentences, tokens and part-of-speech tags in the text aggregate.add(SentenceAnnotator.getDescription()); aggregate.add(TokenAnnotator.getDescription()); aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to classify on the new texts aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(options.getModelDirectory()))); // a very simple annotator that just prints out any named entities we found aggregate.add(AnalysisEngineFactory.createEngineDescription(PrintNamedEntityMentions.class)); // run the classification pipeline on the new texts SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription()); }
false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(modelDirectory)));
false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(options.getModelsDirectory())));