@Before public void setup() throws IOException, URISyntaxException { ADTokenSampleStreamFactory factory = new ADTokenSampleStreamFactory( ADTokenSampleStreamFactory.Parameters.class); File dict = new File(getClass().getClassLoader() .getResource("opennlp/tools/tokenize/latin-detokenizer.xml").toURI()); File data = new File(getClass().getClassLoader() .getResource("opennlp/tools/formats/ad.sample").toURI()); String[] args = { "-data", data.getCanonicalPath(), "-encoding", "UTF-8", "-lang", "por", "-detokenizer", dict.getCanonicalPath() }; ObjectStream<TokenSample> tokenSampleStream = factory.create(args); TokenSample sample = tokenSampleStream.read(); while (sample != null) { samples.add(sample); sample = tokenSampleStream.read(); } }
public ObjectStream<TokenSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); ObjectStream<NameSample> samples = StreamFactoryRegistry.getFactory( NameSample.class, "ad").create( ArgumentParser.filter(args, ADNameSampleStreamFactory.Parameters.class)); return new NameToTokenSampleStream(createDetokenizer(params), samples); } }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(TokenSample.class, "ad", new ADTokenSampleStreamFactory(Parameters.class)); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(TokenSample.class, "ad", new ADTokenSampleStreamFactory(Parameters.class)); }
public static void main(String[] a) throws Exception { ADTokenSampleStreamFactory factory = new ADTokenSampleStreamFactory( ADTokenSampleStreamFactory.Parameters.class); String[] args = { "-data", data.getCanonicalPath(), "-encoding", "ISO-8859-1", "-lang", "pt", "-detokenizer", dict.getCanonicalPath() }; ObjectStream<TokenSample> tokenSampleStream = factory.create(args);
public static void registerFactory() { StreamFactoryRegistry.registerFactory(TokenSample.class, "ad", new ADTokenSampleStreamFactory(Parameters.class)); }
public ObjectStream<TokenSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); ObjectStream<NameSample> samples = StreamFactoryRegistry.getFactory( NameSample.class, "ad").create( ArgumentParser.filter(args, ADNameSampleStreamFactory.Parameters.class)); return new NameToTokenSampleStream(createDetokenizer(params), samples); } }
public static void main(String[] a) throws Exception { ADTokenSampleStreamFactory factory = new ADTokenSampleStreamFactory( ADTokenSampleStreamFactory.Parameters.class); String[] args = { "-data", data.getCanonicalPath(), "-encoding", "ISO-8859-1", "-lang", "pt", "-detokenizer", dict.getCanonicalPath() }; ObjectStream<TokenSample> tokenSampleStream = factory.create(args);
public ObjectStream<TokenSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); ObjectStream<NameSample> samples = StreamFactoryRegistry.getFactory( NameSample.class, "ad").create( ArgumentParser.filter(args, ADNameSampleStreamFactory.Parameters.class)); return new NameToTokenSampleStream(createDetokenizer(params), samples); } }