public ObjectStream<Parse> create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream<File> documentStream = new DirectorySampleStream(new File( params.getOntoNotesDir()), file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } return file.isDirectory(); }, true); // We need file to line here ... and that is probably best doen with the plain text stream // lets copy it over here, refactor it, and then at some point we replace the current version // with the refactored version return new OntoNotesParseSampleStream(new DocumentToLineStream(new FileToStringSampleStream( documentStream, StandardCharsets.UTF_8))); }
private static ObjectStream<Parse> createParseSampleStream() throws IOException { ObjectStream<File> documentStream = new DirectorySampleStream(new File( getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } return file.isDirectory(); }, true); return new OntoNotesParseSampleStream( new DocumentToLineStream(new FileToStringSampleStream( documentStream, StandardCharsets.UTF_8))); }
private static ObjectStream<POSSample> createPOSSampleStream() throws IOException { ObjectStream<File> documentStream = new DirectorySampleStream(new File( getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } return file.isDirectory(); }, true); return new ParseToPOSSampleStream(new OntoNotesParseSampleStream( new DocumentToLineStream( new FileToStringSampleStream(documentStream, StandardCharsets.UTF_8)))); }
public ObjectStream<Parse> create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream<File> documentStream = new DirectorySampleStream(new File( params.getOntoNotesDir()), file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } return file.isDirectory(); }, true); // We need file to line here ... and that is probably best doen with the plain text stream // lets copy it over here, refactor it, and then at some point we replace the current version // with the refactored version return new OntoNotesParseSampleStream(new DocumentToLineStream(new FileToStringSampleStream( documentStream, StandardCharsets.UTF_8))); }
public ObjectStream<Parse> create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream<File> documentStream = new DirectorySampleStream(new File( params.getOntoNotesDir()), file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } return file.isDirectory(); }, true); // We need file to line here ... and that is probably best doen with the plain text stream // lets copy it over here, refactor it, and then at some point we replace the current version // with the refactored version return new OntoNotesParseSampleStream(new DocumentToLineStream(new FileToStringSampleStream( documentStream, StandardCharsets.UTF_8))); }