SampleSkipStream(ObjectStream<T> samples, int samplesToSkip) throws IOException { this.samples = samples; this.samplesToSkip = samplesToSkip; skipSamples(); }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); File sentencesFileDir = params.getSentencesDir(); try { return new SampleSkipStream(new SampleShuffleStream( new LeipzigLanguageSampleStream(sentencesFileDir, Integer.parseInt(params.getSentencesPerSample()), Integer.parseInt(params.getSamplesPerLanguage()) + Integer.parseInt(params.getSamplesToSkip()))), Integer.parseInt(params.getSamplesToSkip())); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data.", e); } } }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(LanguageSample.class, "leipzig", new LeipzigLanguageSampleStreamFactory(Parameters.class)); }
public LanguageSample read() throws IOException { LanguageSample sample; if (sampleStream != null && (sample = sampleStream.read()) != null) { return sample; } else { if (sentencesFilesIt.hasNext()) { File sentencesFile = sentencesFilesIt.next(); String lang = sentencesFile.getName().substring(0, 3); sampleStream = new LeipzigSentencesStream(lang, sentencesFile, sentencesPerSample, langSampleCounts.get(lang)); return read(); } } return null; }
@Test(expected = InvalidFormatException.class) public void testNotEnoughSentences() throws IOException { int samplesPerLanguage = 2; int sentencesPerSample = 2; LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath), sentencesPerSample, samplesPerLanguage); while (stream.read() != null); }
SampleShuffleStream(ObjectStream<T> samples) throws IOException { T sample; while ((sample = samples.read()) != null) { bufferedSamples.add(sample); } Collections.shuffle(bufferedSamples, new Random(23)); reset(); }
public LeipzigLanguageSampleStream(File leipzigFolder, final int sentencesPerSample, final int samplesPerLanguage) throws IOException { this.sentencesPerSample = sentencesPerSample; sentencesFiles = leipzigFolder.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return !pathname.isHidden() && pathname.isFile() && pathname.getName().length() >= 3 && pathname.getName().substring(0,3).matches("[a-z]+"); } }); Arrays.sort(sentencesFiles); Map<String, Integer> langCounts = Arrays.stream(sentencesFiles) .map(file -> file.getName().substring(0, 3)) .collect(Collectors.groupingBy(String::toString, Collectors.summingInt(v -> 1))); langSampleCounts = langCounts.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> samplesPerLanguage / e.getValue())); random = new Random(23); reset(); }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); File sentencesFileDir = params.getSentencesDir(); try { return new SampleSkipStream(new SampleShuffleStream( new LeipzigLanguageSampleStream(sentencesFileDir, Integer.parseInt(params.getSentencesPerSample()), Integer.parseInt(params.getSamplesPerLanguage()) + Integer.parseInt(params.getSamplesToSkip()))), Integer.parseInt(params.getSamplesToSkip())); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data.", e); } } }
public LanguageSample read() throws IOException { LanguageSample sample; if (sampleStream != null && (sample = sampleStream.read()) != null) { return sample; } else { if (sentencesFilesIt.hasNext()) { File sentencesFile = sentencesFilesIt.next(); String lang = sentencesFile.getName().substring(0, 3); sampleStream = new LeipzigSentencesStream(lang, sentencesFile, sentencesPerSample, langSampleCounts.get(lang)); return read(); } } return null; }
@Test public void testReadSentenceFiles() { int samplesPerLanguage = 2; int sentencesPerSample = 1; try { LeipzigLanguageSampleStream stream = new LeipzigLanguageSampleStream(new File(testDataPath), sentencesPerSample, samplesPerLanguage); int count = 0; while (stream.read() != null) count++; Assert.assertEquals(4, count); } catch (IOException e) { Assert.fail(); } }
@Override public void reset() throws IOException, UnsupportedOperationException { this.samples.reset(); skipSamples(); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(LanguageSample.class, "leipzig", new LeipzigLanguageSampleStreamFactory(Parameters.class)); }
SampleShuffleStream(ObjectStream<T> samples) throws IOException { T sample; while ((sample = samples.read()) != null) { bufferedSamples.add(sample); } Collections.shuffle(bufferedSamples, new Random(23)); reset(); }
public ObjectStream<LanguageSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); File sentencesFileDir = params.getSentencesDir(); try { return new SampleSkipStream(new SampleShuffleStream( new LeipzigLanguageSampleStream(sentencesFileDir, Integer.parseInt(params.getSentencesPerSample()), Integer.parseInt(params.getSamplesPerLanguage()) + Integer.parseInt(params.getSamplesToSkip()))), Integer.parseInt(params.getSamplesToSkip())); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data.", e); } } }
public LanguageSample read() throws IOException { LanguageSample sample; if (sampleStream != null && (sample = sampleStream.read()) != null) { return sample; } else { if (sentencesFilesIt.hasNext()) { File sentencesFile = sentencesFilesIt.next(); String lang = sentencesFile.getName().substring(0, 3); sampleStream = new LeipzigSentencesStream(lang, sentencesFile, sentencesPerSample, langSampleCounts.get(lang)); return read(); } } return null; }
SampleSkipStream(ObjectStream<T> samples, int samplesToSkip) throws IOException { this.samples = samples; this.samplesToSkip = samplesToSkip; skipSamples(); }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(LanguageSample.class, "leipzig", new LeipzigLanguageSampleStreamFactory(Parameters.class)); }
SampleSkipStream(ObjectStream<T> samples, int samplesToSkip) throws IOException { this.samples = samples; this.samplesToSkip = samplesToSkip; skipSamples(); }
@Override public void reset() throws IOException, UnsupportedOperationException { this.samples.reset(); skipSamples(); }
@Override public void reset() throws IOException, UnsupportedOperationException { this.samples.reset(); skipSamples(); }