public LeipzigLanguageSampleStream(File leipzigFolder, final int sentencesPerSample, final int samplesPerLanguage) throws IOException { this.sentencesPerSample = sentencesPerSample; sentencesFiles = leipzigFolder.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return !pathname.isHidden() && pathname.isFile() && pathname.getName().length() >= 3 && pathname.getName().substring(0,3).matches("[a-z]+"); } }); Arrays.sort(sentencesFiles); Map<String, Integer> langCounts = Arrays.stream(sentencesFiles) .map(file -> file.getName().substring(0, 3)) .collect(Collectors.groupingBy(String::toString, Collectors.summingInt(v -> 1))); langSampleCounts = langCounts.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> samplesPerLanguage / e.getValue())); random = new Random(23); reset(); }
public LeipzigLanguageSampleStream(File leipzigFolder, final int sentencesPerSample, final int samplesPerLanguage) throws IOException { this.sentencesPerSample = sentencesPerSample; sentencesFiles = leipzigFolder.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return !pathname.isHidden() && pathname.isFile() && pathname.getName().length() >= 3 && pathname.getName().substring(0,3).matches("[a-z]+"); } }); Arrays.sort(sentencesFiles); Map<String, Integer> langCounts = Arrays.stream(sentencesFiles) .map(file -> file.getName().substring(0, 3)) .collect(Collectors.groupingBy(String::toString, Collectors.summingInt(v -> 1))); langSampleCounts = langCounts.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> samplesPerLanguage / e.getValue())); random = new Random(23); reset(); }
public LeipzigLanguageSampleStream(File leipzigFolder, final int sentencesPerSample, final int samplesPerLanguage) throws IOException { this.sentencesPerSample = sentencesPerSample; sentencesFiles = leipzigFolder.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return !pathname.isHidden() && pathname.isFile() && pathname.getName().length() >= 3 && pathname.getName().substring(0,3).matches("[a-z]+"); } }); Arrays.sort(sentencesFiles); Map<String, Integer> langCounts = Arrays.stream(sentencesFiles) .map(file -> file.getName().substring(0, 3)) .collect(Collectors.groupingBy(String::toString, Collectors.summingInt(v -> 1))); langSampleCounts = langCounts.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> samplesPerLanguage / e.getValue())); random = new Random(23); reset(); }