public static List<String> loadLinesFromResource(String resourcePath) throws IOException { return loadLinesFromResource(resourcePath, null); }
public static RootLexicon loadFromResources(Collection<String> resourcePaths) throws IOException { List<String> lines = Lists.newArrayList(); for (String resourcePath : resourcePaths) { lines.addAll(TextIO.loadLinesFromResource(resourcePath, "##")); } return load(lines); }
static List<PairRule> loadPairRule(String resource) throws IOException { List<String> lines = TextIO.loadLinesFromResource(resource, "#"); List<PairRule> rules = new ArrayList<>(); for (String line : lines) { PairRule rule = PairRule.fromLine(line); if (rule == null) { continue; } rules.add(rule); } return rules; }
public static Weights loadFromResource(String resource) throws IOException { List<String> lines = TextIO.loadLinesFromResource(resource); return loadFromLines(lines); }
public static void checkAbbreviations() throws IOException { LinkedHashSet<String> fromProper = new LinkedHashSet<>(TextIO.loadLinesFromResource("tr/proper-from-corpus.dict")); LinkedHashSet<String> fromAbbrv = new LinkedHashSet<>(TextIO.loadLinesFromResource("tr/abbreviations.dict")); Map<String, String> map = new HashMap<>(); putToMap(fromProper, map); Map<String, String> mapAbbrv = new HashMap<>(); putToMap(fromAbbrv, mapAbbrv); for (String s : mapAbbrv.keySet()) { if (map.containsKey(s)) { Log.info(s); map.remove(s); } } List<String> vals = new ArrayList<>(map.values()); vals.sort(Turkish.STRING_COMPARATOR_ASC); Files.write(Paths.get("zemberek.prop.sorted"), vals); }
public Builder addTextDictionaryResources(Collection<String> resources) throws IOException { Log.info("Dictionaries :%s", String.join(", ", resources)); List<String> lines = new ArrayList<>(); for (String resource : resources) { lines.addAll(TextIO.loadLinesFromResource(resource)); } lexicon.addAll(TurkishDictionaryLoader.load(lines)); return this; }
public static PerceptronNer loadModelFromResources(String name, TurkishMorphology morphology) { String resourceRoot = "/ner/model/" + name; try { List<String> types = TextIO.loadLinesFromResource(resourceRoot + "/ner-types"); Map<String, ClassModel> weightsMap = new HashMap<>(); for (String type : types) { String resourcePath = resourceRoot + "/" + type + ".ner.model"; ClassModel weights = ClassModel.loadFromResource(type, resourcePath); weightsMap.put(weights.id, weights); } return new PerceptronNer(weightsMap, morphology); } catch (IOException e) { throw new RuntimeException(e); } }
public static ClassModel loadFromResource(String id, String resourcePath) throws IOException { List<String> lines = TextIO.loadLinesFromResource(resourcePath); return new ClassModel(id, Weights.loadFromLines(lines)); } }
public synchronized void initializeStaticCache(Function<String, WordAnalysis> analysisProvider) { if (staticCacheDisabled || staticCacheInitialized) { return; } new Thread(() -> { try { Stopwatch stopwatch = Stopwatch.createStarted(); List<String> words = TextIO.loadLinesFromResource(MOST_USED_WORDS_FILE); Log.debug("File read in %d ms.", stopwatch.elapsed(TimeUnit.MILLISECONDS)); int size = Math.min(STATIC_CACHE_CAPACITY, words.size()); for (int i = 0; i < size; i++) { String word = words.get(i); staticCache.put(word, analysisProvider.apply(word)); } Log.debug("Static cache initialized with %d most frequent words", size); Log.debug("Initialization time: %d ms.", stopwatch.elapsed(TimeUnit.MILLISECONDS)); } catch (IOException e) { Log.error("Could not read most frequent words list, static cache is disabled."); e.printStackTrace(); } }).start(); staticCacheInitialized = true; }
this.lookupFromAscii = loadMultiMap(dataRoot.resolve("ascii-map")); List<String> manualLookup = TextIO.loadLinesFromResource("normalization/candidates-manual"); this.lookupManual = loadMultiMap(manualLookup); this.commonConnectedSuffixes.addAll(TextIO.loadLinesFromResource( "normalization/question-suffixes")); this.commonConnectedSuffixes.addAll(Arrays.asList("de", "da", "ki")); this.noSplitWords.addAll(TextIO.loadLinesFromResource( "normalization/no-split")); List<String> replaceLines = TextIO.loadLinesFromResource( "normalization/multi-word-replacements"); for (String replaceLine : replaceLines) {
public StemEndingGraph(TurkishMorphology morphology) throws IOException { this.morphology = morphology; List<String> endings = TextIO.loadLinesFromResource("endings"); this.endingGraph = generateEndingGraph(endings); this.stemGraph = generateStemGraph(); Set<Node> stemWordNodes = stemGraph.getAllNodes(n -> n.word != null); for (Node node : stemWordNodes) { node.connectEpsilon(endingGraph.getRoot()); } }