public ObjectStream<ChunkSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new ChunkSampleStream(lineStream); } }
super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); CmdLineUtil.checkOutputFile("language detector model", modelOutFile); CmdLineUtil.writeModel("language detector", modelOutFile, model);
CmdLineUtil.handleStdinIoError(e);
public T load(File modelFile) { long beginModelLoadingTime = System.currentTimeMillis(); CmdLineUtil.checkInputFile(modelName + " model", modelFile); System.err.print("Loading " + modelName + " model ... "); T model; try (InputStream modelIn = new BufferedInputStream( CmdLineUtil.openInFile(modelFile), CmdLineUtil.IO_BUFFER_SIZE)) { model = loadModel(modelIn); } catch (InvalidFormatException e) { System.err.println("failed"); throw new TerminateToolException(-1, "Model has invalid format", e); } catch (IOException e) { System.err.println("failed"); throw new TerminateToolException(-1, "IO error while loading model file '" + modelFile + "'", e); } long modelLoadingDuration = System.currentTimeMillis() - beginModelLoadingTime; System.err.printf("done (%.3fs)\n", modelLoadingDuration / 1000d); return model; } }
public ObjectStream<POSSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); InputStreamFactory inFactory = CmdLineUtil.createInputStreamFactory(params.getData()); try { System.setOut(new PrintStream(System.out, true, "UTF-8")); return new ConllXPOSSampleStream(inFactory, StandardCharsets.UTF_8); } catch (UnsupportedEncodingException e) { // this shouldn't happen throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.getMessage(), e); } catch (IOException e) { // That will throw an exception CmdLineUtil.handleCreateObjectStreamError(e); return null; } } }
public ObjectStream<FeatureSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(CmdLineUtil.createInputStreamFactory(params.getData()), params.getEncoding()); } catch (IOException e) { CmdLineUtil.handleCreateObjectStreamError(e); } return new FeatureSampleStream(lineStream); } }
public ObjectStream<TokenSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); IrishSentenceBankDocument isbDoc = null; try { isbDoc = IrishSentenceBankDocument.parse(params.getData()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new IrishSentenceBankTokenSampleStream(isbDoc); } }
File dictOutFile = new File(params.getDict()); CmdLineUtil.checkInputFile("Name data", testData); CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(testData);
public void run(String[] args) { Params params = validateAndParseParams(args, Params.class); File dictInFile = params.getInputFile(); File dictOutFile = params.getOutputFile(); Charset encoding = params.getEncoding(); CmdLineUtil.checkInputFile("dictionary input file", dictInFile); CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile); try (InputStreamReader in = new InputStreamReader(new FileInputStream(dictInFile), encoding); OutputStream out = new FileOutputStream(dictOutFile)) { Dictionary dict = Dictionary.parseOneEntryPerLine(in); dict.serialize(out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } } }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile);
public void run(String[] args) { if (args.length != 2) { System.out.println(getHelp()); } else { File parserModelInFile = new File(args[0]); ParserModel parserModel = new ParserModelLoader().load(parserModelInFile); File taggerModelInFile = new File(args[1]); POSModel taggerModel = new POSModelLoader().load(taggerModelInFile); ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel); CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel); } } }
static Dictionary loadDict(File f) throws IOException { Dictionary dict = null; if (f != null) { CmdLineUtil.checkInputFile("abb dict", f); dict = new Dictionary(new FileInputStream(f)); } return dict; }
/** * Writes a {@link BaseModel} to disk. Occurring errors are printed to the console * to inform the user. * * @param modelName type of the model, name is used in error messages. * @param modelFile output file of the model * @param model the model itself which should be written to disk */ public static void writeModel(String modelName, File modelFile, BaseModel model) { CmdLineUtil.checkOutputFile(modelName + " model", modelFile); System.err.print("Writing " + modelName + " model ... "); long beginModelWritingTime = System.currentTimeMillis(); try (OutputStream modelOut = new BufferedOutputStream( new FileOutputStream(modelFile), IO_BUFFER_SIZE)) { model.serialize(modelOut); } catch (IOException e) { System.err.println("failed"); throw new TerminateToolException(-1, "Error during writing model file '" + modelFile + "'", e); } long modelWritingDuration = System.currentTimeMillis() - beginModelWritingTime; System.err.printf("done (%.3fs)\n", modelWritingDuration / 1000d); System.err.println(); System.err.println("Wrote " + modelName + " model to"); System.err.println("path: " + modelFile.getAbsolutePath()); System.err.println(); }
public static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) { byte[] featureGeneratorBytes = null; // load descriptor file into memory if (featureGenDescriptorFile != null) { try (InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile)) { featureGeneratorBytes = ModelUtil.read(bytesIn); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } } return featureGeneratorBytes; }
Integer beamSize = CmdLineUtil.getIntParameter("-bs", args); if (beamSize == null) { beamSize = AbstractBottomUpParser.defaultBeamSize; Integer numParses = CmdLineUtil.getIntParameter("-k", args); boolean showTopK; if (numParses == null) { Double advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args); String tokenizerModelName = CmdLineUtil.getParameter( "-tk", args ); if (tokenizerModelName != null ) { TokenizerModel tokenizerModel = new TokenizerModelLoader().load(new File(tokenizerModelName)); CmdLineUtil.handleStdinIoError(e);
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters();
public ObjectStream<FeatureSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(CmdLineUtil.createInputStreamFactory(params.getData()), params.getEncoding()); } catch (IOException e) { CmdLineUtil.handleCreateObjectStreamError(e); } return new FeatureSampleStream(lineStream); } }
static ObjectStream<WSDSample> openSampleData(String sampleDataName, File sampleDataFile, Charset encoding) { CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile); FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile); ObjectStream<String> lineStream = new PlainTextByLineStream( sampleDataIn.getChannel(), encoding); return new WSDSampleStream(lineStream); } }
@Override public ObjectStream<TokenSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); InputStreamFactory inFactory = CmdLineUtil.createInputStreamFactory(params.getData()); try { return new ConlluTokenSampleStream(new ConlluStream(inFactory)); } catch (IOException e) { // That will throw an exception CmdLineUtil.handleCreateObjectStreamError(e); } return null; } }
@Override public ObjectStream<SentenceSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); IrishSentenceBankDocument isbDoc = null; try { isbDoc = IrishSentenceBankDocument.parse(params.getData()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new IrishSentenceBankSentenceStream(isbDoc); } }