zemberek.morphology.TurkishMorphology.createWithDefaults java code examples

public ZemberekContext(ZemberekGrpcConfiguration configuration) {
 tokenizer = TurkishTokenizer.ALL;
 morphology = TurkishMorphology.createWithDefaults();
 this.configuration = configuration;
}

public ZemberekContext() {
 tokenizer = TurkishTokenizer.ALL;
 morphology = TurkishMorphology.createWithDefaults();
}

 Singleton() {
  morphology = TurkishMorphology.createWithDefaults();
 }
}

public AmbiguityStats() throws IOException {
 parser = TurkishMorphology.createWithDefaults();
}

private void generateSetWithLemmas(List<String> lines, Path lemmasPath) throws IOException {
 morphology = TurkishMorphology.createWithDefaults();
 List<String> lemmas = lines
   .stream()
   .map(this::replaceWordsWithLemma)
   .map(this::removeNonWords)
   .map(s -> s.toLowerCase(Turkish.LOCALE))
   .collect(Collectors.toList());
 Files.write(lemmasPath, lemmas);
}

static void saveUnambigious() throws IOException {
 Path goldTest = Paths.get("data/gold/gold-test.sentences");
 //Path goldTest = Paths.get("data/gold/test.txt");
 Path goldTestOut = Paths.get("data/gold/gold-test.txt");
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 saveUnambiguous(clean(Files.readAllLines(goldTest)), morphology, goldTestOut);
}

public static void saveLemmas(int minLength) throws IOException {
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 Set<String> set = new HashSet<>();
 for (DictionaryItem item : morphology.getLexicon()) {
  String lemma = item.lemma;
  if (item.attributes.contains(RootAttribute.Dummy)) {
   continue;
  }
  if (lemma.length() < minLength) {
   continue;
  }
  if (item.primaryPos == PrimaryPos.Punctuation) {
   continue;
  }
  set.add(lemma);
 }
 List<String> list = new ArrayList<>(set);
 list.sort(Turkish.STRING_COMPARATOR_ASC);
 Files.write(Paths.get("zemberek.vocab"), list);
}

private static void filterVocab(Path vocabFile, Path outFile) throws IOException {
 List<String> words = Files.readAllLines(vocabFile, StandardCharsets.UTF_8);
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 List<String> result = new ArrayList<>();
 for (String word : words) {
  WordAnalysis analysis = morphology.analyze(word);
  if (!analysis.isCorrect()) {
   Log.warn("Cannot analyze %s", word);
   continue;
  }
  result.add(word);
 }
 Files.write(outFile, result, StandardCharsets.UTF_8);
}

public static void main(String[] args) throws IOException {
 // assumes you generated a model in my-model directory.
 Path modelRoot = Paths.get("my-model");
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 PerceptronNer ner = PerceptronNer.loadModel(modelRoot, morphology);
 String sentence = "Ali Kaan yarın İstanbul'a gidecek.";
 NerSentence result = ner.findNamedEntities(sentence);
 List<NamedEntity> namedEntities = result.getNamedEntities();
 for (NamedEntity namedEntity : namedEntities) {
  System.out.println(namedEntity);
 }
}

public static void main(String[] args) throws Exception {
 TurkishMorphology morphology =  TurkishMorphology.createWithDefaults();
 Path indexRoot = Paths.get("/home/aaa/data/zemberek/corpus-index");
 CorpusSearcher searcher = new CorpusSearcher(indexRoot);
 AmbiguousExampleFinder finder = new AmbiguousExampleFinder(searcher);
 extractSentences(morphology, finder);
}

public static DistanceBasedStemmer load(Path vector, Path distances, Path vocabFile)
  throws IOException {
 Log.info("Loading vector file.");
 List<WordVector> wordVectors = WordVector.loadFromBinary(vector);
 Map<String, WordVector> map = new HashMap<>(wordVectors.size());
 for (WordVector wordVector : wordVectors) {
  map.put(wordVector.word, wordVector);
 }
 Log.info("Loading distances.");
 DistanceList experiment = DistanceList.readFromBinary(distances, vocabFile);
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 return new DistanceBasedStemmer(map, experiment, morphology);
}

public static void main(String[] args) throws IOException {
 // you will need ner-train and ner-test files to run this example.
 Path trainPath = Paths.get("ner-train");
 Path testPath = Paths.get("ner-test");
 Path modelRoot = Paths.get("my-model");
 NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
 Log.info(trainingSet.info()); // prints information
 NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
 Log.info(testSet.info());
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 // Training occurs here. Result is a PerceptronNer instance.
 // There will be 7 iterations with 0.1 learning rate.
 PerceptronNer ner = new PerceptronNerTrainer(morphology)
   .train(trainingSet, testSet, 13, 0.1f);
 Files.createDirectories(modelRoot);
 ner.saveModelAsText(modelRoot);
}

public static void main(String[] args) {
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 String sentence = "Keşke yarın hava güzel olsa.";
 Log.info("Sentence  = " + sentence);
 SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
 for (SentenceWordAnalysis a : analysis) {
  PrimaryPos primaryPos = a.getBestAnalysis().getPos();
  Log.info("%s : %s ",
    a.getWordAnalysis().getInput(),
    primaryPos);
 }
}

 public static void main(String[] args) {

  TurkishMorphology morphology = TurkishMorphology.createWithDefaults();

  String sentence = "Bol baharatlı bir yemek yaptıralım.";
  Log.info("Sentence  = " + sentence);
  List<WordAnalysis> analyses = morphology.analyzeSentence(sentence);

  Log.info("Sentence word analysis result:");
  for (WordAnalysis entry : analyses) {
   Log.info("Word = " + entry.getInput());
   for (SingleAnalysis analysis : entry) {
    Log.info(analysis.formatLong());
   }
  }
  SentenceAnalysis result = morphology.disambiguate(sentence, analyses);

  Log.info("\nAfter ambiguity resolution : ");
  result.bestAnalysis().forEach(Log::info);
 }
}

public static void trainAndTest(
  Path trainPath,
  Path testPath,
  Path modelRoot,
  Path reportPath) throws IOException {
 NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
 Log.info(trainingSet.info());
 NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
 Log.info(testSet.info());
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 PerceptronNer ner = new PerceptronNerTrainer(morphology)
   .train(trainingSet, testSet, 7, 0.1f);
 Files.createDirectories(modelRoot);
 ner.saveModelAsText(modelRoot);
 Log.info("Testing %d sentences.", testSet.sentences.size());
 NerDataSet testResult = ner.evaluate(testSet);
 PerceptronNerTrainer.evaluationReport(testSet, testResult, reportPath);
 Log.info("Done.");
}

public static void main(String[] args) {
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 String word = "kutucuğumuz";
 Log.info("Word = " + word);
 Log.info("Results: ");
 WordAnalysis results = morphology.analyze(word);
 for (SingleAnalysis result : results) {
  Log.info(result.formatLong());
  Log.info("\tStems = " + result.getStems());
  Log.info("\tLemmas = " + result.getLemmas());
 }
}

 public static void main(String[] args) throws IOException {
  TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
  TurkishSpellChecker spellChecker = new TurkishSpellChecker(morphology);

  Log.info("Check if written correctly.");
  String[] words = {"Ankara'ya", "Ankar'aya", "yapbileceksen", "yapabileceğinizden"};
  for (String word : words) {
   Log.info(word + " -> " + spellChecker.check(word));
  }
  Log.info();
  Log.info("Give suggestions.");
  String[] toSuggest = {"Kraamanda", "okumuştk", "yapbileceksen", "oukyamıyorum"};
  for (String s : toSuggest) {
   Log.info(s + " -> " + spellChecker.suggestForWord(s));
  }
 }
}

public static void main(String[] args) {
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 String word = "kalemi";
 Log.info("Word = " + word);
 WordAnalysis results = morphology.analyze(word);
 for (SingleAnalysis result : results) {
  Log.info("Lexical and Surface : " + result.formatLong());
  Log.info("Only Lexical        : " + result.formatLexical());
  Log.info("Oflazer style       : " +
    AnalysisFormatters.OFLAZER_STYLE.format(result));
  Log.info();
 }
}

public static void main(String[] args) {
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 DictionaryItem newStem = morphology.getLexicon().getMatchingItems("poğaça").get(0);
 String word = "simidime";
 Log.info("Input Word = " + word);
 WordAnalysis results = morphology.analyze(word);
 for (SingleAnalysis result : results) {
  List<Result> generated =
    morphology.getWordGenerator().generate(newStem, result.getMorphemes());
  for (Result s : generated) {
   Log.info("Input analysis: " + result.formatLong());
   Log.info("After stem change, word = " + s.surface);
   Log.info("After stem change, Analysis = " + s.analysis.formatLong());
  }
 }
}

public static void main(String[] args) throws IOException {
 TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
 AddNewDictionaryItem app = new AddNewDictionaryItem(morphology);
 Log.info("Proper Noun Test - 1 :");
 app.test("Meydan'a",
   new DictionaryItem("Meydan", "meydan", "meydan",
     PrimaryPos.Noun, SecondaryPos.ProperNoun));
 Log.info("----");
 Log.info("Proper Noun Test - 2 :");
 app.test("Meeeydan'a",
   new DictionaryItem("Meeeydan", "meeeydan", "meeeydan",
     PrimaryPos.Noun, SecondaryPos.ProperNoun));
 Log.info("----");
 Log.info("Verb Test : ");
 app.test("tweetleyeyazdım",
   new DictionaryItem("tweetlemek", "tweetle", "tivitle",
     PrimaryPos.Verb, SecondaryPos.None));
}

Popular methods of TurkishMorphology

analyze
analyzeAndDisambiguate
Applies morphological analysis and disambiguation to a sentence.
builder
getLexicon
analyzeSentence
getWordGenerator
disambiguate
getMorphotactics
invalidateCache
<init>
analyzeWithCache
analyzeWithoutCache

Popular in Java

Creating JSON documents from java classes using gson
setContentView (Activity)
findViewById (Activity)
compareTo (BigDecimal)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
BoxLayout (javax.swing)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top Sublime Text plugins

How to use createWithDefaultsmethodin zemberek.morphology.TurkishMorphology

Best Java code snippets using zemberek.morphology.TurkishMorphology.createWithDefaults (Showing top 20 results out of 315)

How to use
createWithDefaults
method
in
zemberek.morphology.TurkishMorphology