/** * Trains a classifier in the given directory. * * The directory should already contain training data as written by a {@link DataWriter} or * {@link SequenceDataWriter}. * * @param directory * The directory containing the training data. * @param trainingArguments * Additional command-line arguments that should be passed to the classifier. */ public static void main(File directory, String... trainingArguments) throws Exception { JarClassifierBuilder.trainAndPackage(directory, trainingArguments); }
public static File getModelJarFile(String directoryName) { return getModelJarFile(new File(directoryName)); }
public CLASSIFIER_TYPE createClassifier() throws IOException { InputStream stream = this.getClass().getResourceAsStream(this.classifierJarPath); if (stream == null) { try { stream = new URL(this.classifierJarPath).openStream(); } catch (MalformedURLException e) { stream = new FileInputStream(this.classifierJarPath); } } stream = new BufferedInputStream(stream); JarInputStream modelStream = new JarInputStream(stream); JarClassifierBuilder<?> builder = JarClassifierBuilder.fromManifest(modelStream.getManifest()); try { return this.getClassifierClass().cast(builder.loadClassifier(modelStream)); } finally { stream.close(); } }
/** * Loads a classifier builder from the training directory and invokes * {@link #trainClassifier(File, String...)} and {@link #packageClassifier(File)}. * * @param trainingDirectory * The directory where {@link #saveToTrainingDirectory(File)} has saved the model * training files. * @param trainingArguments * Any additional arguments that should be passed to the classifier trainer via * {@link #trainClassifier(File, String...)}. */ public static void trainAndPackage(File trainingDirectory, String... trainingArguments) throws Exception { JarClassifierBuilder<?> classifierBuilder = fromTrainingDirectory(trainingDirectory); classifierBuilder.trainClassifier(trainingDirectory, trainingArguments); classifierBuilder.packageClassifier(trainingDirectory); }
/** * Load a classifier packaged by {@link #packageClassifier(File)} from an {@link InputStream}. * * This method should typically not be overridden by subclasses - use * {@link #unpackageClassifier(JarInputStream)} and {@link #newClassifier()} instead. * * @param inputStream * The classifier stream. * @return The loaded classifier. */ public CLASSIFIER_TYPE loadClassifier(InputStream inputStream) throws IOException { // if it's already a Jar stream, don't re-wrap it if (inputStream instanceof JarInputStream) { JarInputStream modelStream = (JarInputStream) inputStream; this.unpackageClassifier(modelStream); return this.newClassifier(); } // if we need to wrap it in a Jar stream, be sure to close the stream afterwards else { JarInputStream modelStream = new JarInputStream(inputStream); try { this.unpackageClassifier(modelStream); return this.newClassifier(); } finally { modelStream.close(); } } }
/** * Loads a classifier builder from manifest in the training directory. * * @param dir * The training directory where the classifier builder was written by a call to * {@link #saveToTrainingDirectory(File)}. This is typically the same directory as was * used for {@link DirectoryDataWriterFactory#PARAM_OUTPUT_DIRECTORY}. * @return A new classifier builder. */ public static JarClassifierBuilder<?> fromTrainingDirectory(File dir) throws IOException { InputStream stream = new BufferedInputStream(new FileInputStream(getManifestFile(dir))); Manifest manifest = new Manifest(stream); stream.close(); return fromManifest(manifest); }
JarClassifierBuilder.trainAndPackage(modelDir, modelInfo.trainingArguments); if (modelDir.exists()) { for (File file : modelDir.listFiles()) { File modelFile = JarClassifierBuilder.getModelJarFile(modelDir); if (!file.isDirectory() && !file.equals(modelFile)) { file.delete();
/** * Create a classifier jar file from a directory where a classifier model was trained by * {@link #trainClassifier(File, String[])}. * * This method should typically not be overridden by subclasses - use * {@link #packageClassifier(File, JarOutputStream)} instead. * * @param dir * The directory where the classifier model was trained. */ public void packageClassifier(File dir) throws IOException { JarOutputStream modelStream = new JarOutputStream(new BufferedOutputStream( new FileOutputStream(getModelJarFile(dir))), this.manifest); this.packageClassifier(dir, modelStream); modelStream.close(); }
/** * Load a classifier packaged by {@link #packageClassifier(File)} from the jar file in the * training directory. * * This method should typically not be overridden by subclasses - use * {@link #unpackageClassifier(JarInputStream)} and {@link #newClassifier()} instead. * * @param dir * The directory where the classifier was trained and packaged. * @return The loaded classifier. */ public CLASSIFIER_TYPE loadClassifierFromTrainingDirectory(File dir) throws IOException { File modelJarFile = getModelJarFile(dir); InputStream inputStream = new BufferedInputStream(new FileInputStream(modelJarFile)); try { return this.loadClassifier(inputStream); } finally { inputStream.close(); } }
public static void main(String... args) throws Exception { String programName = BuildJar.class.getName(); String usage = String.format("usage: java %s DIR\n\n" + "The directory DIR should contain the MANIFEST.MF file as created by\n" + "a classifier DataWriter, and all files created by training a model\n" + "from the corresponding training-data.xxx file\n", programName); // usage message for wrong number of arguments if (args.length != 1) { System.err.format("error: wrong number of arguments\n%s", usage); System.exit(1); } File dir = new File(args[0]); // get the classifier builder from the training directory JarClassifierBuilder<?> classifierBuilder = JarClassifierBuilder.fromTrainingDirectory(dir); classifierBuilder.packageClassifier(dir); } }
@Override protected void unpackageClassifier(JarInputStream modelStream) throws IOException { super.unpackageClassifier(modelStream); JarStreams.getNextJarEntry(modelStream, MODEL_NAME); InputStreamReader reader = new InputStreamReader(modelStream, Charsets.US_ASCII); this.value = this.parseOutcome(CharStreams.toString(reader)); }
@Override protected void packageClassifier(File dir, JarOutputStream modelStream) throws IOException { super.packageClassifier(dir, modelStream); JarStreams.putNextJarEntry(modelStream, MODEL_NAME, new File(dir, MODEL_NAME)); }
JarClassifierBuilder.trainAndPackage(trainingDirectory); File modelFile = JarClassifierBuilder.getModelJarFile(trainingDirectory); if (!file.isDirectory() && !file.equals(modelFile)) { file.delete();
@Override public void packageClassifier(File dir, JarOutputStream modelStream) throws IOException { super.packageClassifier(dir, modelStream); JarStreams.putNextJarEntry( modelStream, DELEGATED_MODEL_FILE_NAME, JarClassifierBuilder.getModelJarFile(this.getDelegatedModelDirectory(dir))); JarStreams.putNextJarEntry( modelStream, OUTCOME_FEATURE_EXTRACTOR_FILE_NAME, getOutcomeFeatureExtractorsFile(dir)); }
@Override protected void unpackageClassifier(JarInputStream modelStream) throws IOException { super.unpackageClassifier(modelStream); JarStreams.getNextJarEntry(modelStream, ENCODERS_FILE_NAME); ObjectInputStream is = new ObjectInputStream(modelStream); try { this.featuresEncoder = this.featuresEncoderCast(is.readObject()); this.outcomeEncoder = this.outcomeEncoderCast(is.readObject()); } catch (ClassNotFoundException e) { throw new RuntimeException("Classes not found for serialized encoder objects", e); } }
@Override protected void packageClassifier(File dir, JarOutputStream modelStream) throws IOException { super.packageClassifier(dir, modelStream); JarStreams.putNextJarEntry(modelStream, ENCODERS_FILE_NAME, getEncodersFile(dir)); }
@Override public void train(CollectionReader collectionReader, File directory) throws Exception { System.err.printf("%s: %s:\n", this.getClass().getSimpleName(), directory.getName()); System.err.println(this.parameterSettings); SimplePipeline.runPipeline( collectionReader, AnalysisEngineFactory.createEngineDescription(OnlyGoldModifiers.class), ModifierExtractorAnnotator.getDescription( DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, this.parameterSettings.dataWriterClass, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, directory.getPath())); JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments); }
private void cleanTrainingFiles(File directory) { File modelJarFile = JarClassifierBuilder.getModelJarFile(directory); for (File file : directory.listFiles()) { if (file.isDirectory()) { this.cleanTrainingFiles(file); } if (!file.equals(modelJarFile)) { file.delete(); } } }
@Override public void unpackageClassifier(JarInputStream modelStream) throws IOException { JarStreams.getNextJarEntry(modelStream, DELEGATED_MODEL_FILE_NAME); JarInputStream delegatedModelStream = new JarInputStream(modelStream); Manifest delegatedManifest = delegatedModelStream.getManifest(); JarClassifierBuilder<?> delegatedBuilder = JarClassifierBuilder.fromManifest(delegatedManifest); this.delegatedClassifier = this.cast(delegatedBuilder.loadClassifier(delegatedModelStream)); JarStreams.getNextJarEntry(modelStream, OUTCOME_FEATURE_EXTRACTOR_FILE_NAME); ObjectInputStream objectStream = new ObjectInputStream(modelStream); try { this.outcomeFeatureExtractors = (OutcomeFeatureExtractor[]) objectStream.readObject(); } catch (ClassNotFoundException e) { throw new IOException(e); } }
@Override protected void unpackageClassifier(JarInputStream modelStream) throws IOException { super.unpackageClassifier(modelStream); JarStreams.getNextJarEntry(modelStream, this.getModelName()); this.model = this.loadModel(modelStream); }