private static ObjectStream<NameSample> openData(LANGUAGE lang, String name) throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(Conll02NameSampleStreamTest.class, "/opennlp/tools/formats/" + name); return new Conll02NameSampleStream(lang, in, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); }
return new Conll02NameSampleStream(lang, CmdLineUtil.createInputStreamFactory(params.getData()), typesToGenerate); } catch (IOException e) {
private TokenNameFinderModel train(File trainFile, LANGUAGE lang, TrainingParameters params, int types) throws IOException { ObjectStream<NameSample> samples = new Conll02NameSampleStream( lang,new MarkableFileInputStreamFactory(trainFile), types); return NameFinderME.train(lang.toString().toLowerCase(), null, samples, params, new TokenNameFinderFactory()); }
spanishTestBFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.testb"); verifyTrainingData(new Conll02NameSampleStream( LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTrainingFile), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES), new BigInteger("109687424525847313767541246922170457976")); verifyTrainingData(new Conll02NameSampleStream( LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestAFile), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES), new BigInteger("12942966701628852910737840182656846323")); verifyTrainingData(new Conll02NameSampleStream( LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestBFile), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES), new BigInteger("223206987942490952427646331013509976957")); verifyTrainingData(new Conll02NameSampleStream( LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTrainingFile), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES), new BigInteger("226089384066775461905386060946810714487")); verifyTrainingData(new Conll02NameSampleStream( LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestAFile), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES), new BigInteger("313879596837181728494732341737647284762")); verifyTrainingData(new Conll02NameSampleStream( LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestBFile), Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
private void eval(TokenNameFinderModel model, File testData, LANGUAGE lang, int types, double expectedFMeasure) throws IOException { ObjectStream<NameSample> samples = new Conll02NameSampleStream( lang, new MarkableFileInputStreamFactory(testData), types); TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(new NameFinderME(model)); evaluator.evaluate(samples); Assert.assertEquals(expectedFMeasure, evaluator.getFMeasure().getFMeasure(), 0.0001); }
return new Conll02NameSampleStream(lang, CmdLineUtil.createInputStreamFactory(params.getData()), typesToGenerate); } catch (IOException e) {
return new Conll02NameSampleStream(lang, CmdLineUtil.createInputStreamFactory(params.getData()), typesToGenerate); } catch (IOException e) {