zemberek.core.text.TextIO.loadLinesFromResource java code examples

public static List<String> loadLinesFromResource(String resourcePath) throws IOException {
 return loadLinesFromResource(resourcePath, null);
}

public static RootLexicon loadFromResources(Collection<String> resourcePaths)
  throws IOException {
 List<String> lines = Lists.newArrayList();
 for (String resourcePath : resourcePaths) {
  lines.addAll(TextIO.loadLinesFromResource(resourcePath, "##"));
 }
 return load(lines);
}

static List<PairRule> loadPairRule(String resource) throws IOException {
 List<String> lines = TextIO.loadLinesFromResource(resource, "#");
 List<PairRule> rules = new ArrayList<>();
 for (String line : lines) {
  PairRule rule = PairRule.fromLine(line);
  if (rule == null) {
   continue;
  }
  rules.add(rule);
 }
 return rules;
}

public static Weights loadFromResource(String resource) throws IOException {
 List<String> lines = TextIO.loadLinesFromResource(resource);
 return loadFromLines(lines);
}

public static void checkAbbreviations() throws IOException {
 LinkedHashSet<String> fromProper =
   new LinkedHashSet<>(TextIO.loadLinesFromResource("tr/proper-from-corpus.dict"));
 LinkedHashSet<String> fromAbbrv =
   new LinkedHashSet<>(TextIO.loadLinesFromResource("tr/abbreviations.dict"));
 Map<String, String> map = new HashMap<>();
 putToMap(fromProper, map);
 Map<String, String> mapAbbrv = new HashMap<>();
 putToMap(fromAbbrv, mapAbbrv);
 for (String s : mapAbbrv.keySet()) {
  if (map.containsKey(s)) {
   Log.info(s);
   map.remove(s);
  }
 }
 List<String> vals = new ArrayList<>(map.values());
 vals.sort(Turkish.STRING_COMPARATOR_ASC);
 Files.write(Paths.get("zemberek.prop.sorted"), vals);
}

public Builder addTextDictionaryResources(Collection<String> resources) throws IOException {
 Log.info("Dictionaries :%s", String.join(", ", resources));
 List<String> lines = new ArrayList<>();
 for (String resource : resources) {
  lines.addAll(TextIO.loadLinesFromResource(resource));
 }
 lexicon.addAll(TurkishDictionaryLoader.load(lines));
 return this;
}

public static PerceptronNer loadModelFromResources(String name, TurkishMorphology morphology) {
 String resourceRoot = "/ner/model/" + name;
 try {
  List<String> types = TextIO.loadLinesFromResource(resourceRoot + "/ner-types");
  Map<String, ClassModel> weightsMap = new HashMap<>();
  for (String type : types) {
   String resourcePath = resourceRoot + "/" + type + ".ner.model";
   ClassModel weights = ClassModel.loadFromResource(type, resourcePath);
   weightsMap.put(weights.id, weights);
  }
  return new PerceptronNer(weightsMap, morphology);
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

 public static ClassModel loadFromResource(String id, String resourcePath) throws IOException {
  List<String> lines = TextIO.loadLinesFromResource(resourcePath);
  return new ClassModel(id, Weights.loadFromLines(lines));
 }
}

public synchronized void initializeStaticCache(Function<String, WordAnalysis> analysisProvider) {
 if (staticCacheDisabled || staticCacheInitialized) {
  return;
 }
 new Thread(() -> {
  try {
   Stopwatch stopwatch = Stopwatch.createStarted();
   List<String> words = TextIO.loadLinesFromResource(MOST_USED_WORDS_FILE);
   Log.debug("File read in %d ms.", stopwatch.elapsed(TimeUnit.MILLISECONDS));
   int size = Math.min(STATIC_CACHE_CAPACITY, words.size());
   for (int i = 0; i < size; i++) {
    String word = words.get(i);
    staticCache.put(word, analysisProvider.apply(word));
   }
   Log.debug("Static cache initialized with %d most frequent words", size);
   Log.debug("Initialization time: %d ms.", stopwatch.elapsed(TimeUnit.MILLISECONDS));
  } catch (IOException e) {
   Log.error("Could not read most frequent words list, static cache is disabled.");
   e.printStackTrace();
  }
 }).start();
 staticCacheInitialized = true;
}

this.lookupFromAscii = loadMultiMap(dataRoot.resolve("ascii-map"));
List<String> manualLookup =
  TextIO.loadLinesFromResource("normalization/candidates-manual");
this.lookupManual = loadMultiMap(manualLookup);
this.commonConnectedSuffixes.addAll(TextIO.loadLinesFromResource(
  "normalization/question-suffixes"));
this.commonConnectedSuffixes.addAll(Arrays.asList("de", "da", "ki"));
this.noSplitWords.addAll(TextIO.loadLinesFromResource(
  "normalization/no-split"));
List<String> replaceLines = TextIO.loadLinesFromResource(
  "normalization/multi-word-replacements");
for (String replaceLine : replaceLines) {

public StemEndingGraph(TurkishMorphology morphology) throws IOException {
 this.morphology = morphology;
 List<String> endings = TextIO.loadLinesFromResource("endings");
 this.endingGraph = generateEndingGraph(endings);
 this.stemGraph = generateStemGraph();
 Set<Node> stemWordNodes = stemGraph.getAllNodes(n -> n.word != null);
 for (Node node : stemWordNodes) {
  node.connectEpsilon(endingGraph.getRoot());
 }
}

Popular methods of TextIO

Popular in Java

Updating database using SQL prepared statement
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
onCreateOptionsMenu (Activity)
getContentResolver (Context)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
JFileChooser (javax.swing)
JTable (javax.swing)
Top plugins for WebStorm

How to use loadLinesFromResourcemethodin zemberek.core.text.TextIO

Best Java code snippets using zemberek.core.text.TextIO.loadLinesFromResource (Showing top 11 results out of 315)

How to use
loadLinesFromResource
method
in
zemberek.core.text.TextIO