public static LanguageDetectorSampleStream createSampleStream() throws IOException { ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory( LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt"); PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8"); return new LanguageDetectorSampleStream(lineStream); } }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new LanguageDetectorSampleStream(lineStream); } }
@BeforeClass public static void train() throws Exception { ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory( LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt"); PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8"); LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, "100"); params.put(TrainingParameters.CUTOFF_PARAM, "5"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); model = LanguageDetectorME.train(sampleStream, params, new DummyFactory()); }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new LanguageDetectorSampleStream(lineStream); } }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); ObjectStream<String> lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { CmdLineUtil.handleCreateObjectStreamError(ex); } return new LanguageDetectorSampleStream(lineStream); } }