/** * Applies morphological analysis and disambiguation to a sentence. * * @param sentence Sentence. * @return SentenceAnalysis instance. */ public SentenceAnalysis analyzeAndDisambiguate(String sentence) { return disambiguate(sentence, analyzeSentence(sentence)); }
public ZemberekContext(ZemberekGrpcConfiguration configuration) { tokenizer = TurkishTokenizer.ALL; morphology = TurkishMorphology.createWithDefaults(); this.configuration = configuration; }
public TurkishMorphology build() { return new TurkishMorphology(this); } }
public static void main(String[] args) { String[] number = {"A3sg", "A3pl"}; String[] possessives = {"P1sg", "P2sg", "P3sg"}; String[] cases = {"Dat", "Loc", "Abl"}; TurkishMorphology morphology = TurkishMorphology.builder().setLexicon("armut").disableCache().build(); DictionaryItem item = morphology.getLexicon().getMatchingItems("armut").get(0); for (String numberM : number) { for (String possessiveM : possessives) { for (String caseM : cases) { List<Result> results = morphology.getWordGenerator().generate(item, numberM, possessiveM, caseM); results.forEach(s->System.out.println(s.surface)); } } } }
public static void main(String[] args) throws IOException { TurkishMorphology morphology = TurkishMorphology.builder() .ignoreDiacriticsInAnalysis() .setLexicon(RootLexicon.getDefault()) .build(); morphology.analyze("kisi").forEach(System.out::println); }
public static void main(String[] args) { TurkishMorphology morphology = TurkishMorphology.createWithDefaults(); String word = "kutucuğumuz"; Log.info("Word = " + word); Log.info("Results: "); WordAnalysis results = morphology.analyze(word); for (SingleAnalysis result : results) { Log.info(result.formatLong()); Log.info("\tStems = " + result.getStems()); Log.info("\tLemmas = " + result.getLemmas()); } }
/** * Returns true if only word is analysed with internal dictionary and analysis dictionary item is * not proper noun. */ boolean hasRegularAnalysis(String s) { WordAnalysis a = morphology.analyze(s); return a.stream().anyMatch(k -> !k.isUnknown() && !k.isRuntime() && k.getDictionaryItem().secondaryPos != SecondaryPos.ProperNoun && k.getDictionaryItem().secondaryPos != SecondaryPos.Abbreviation ); }
public static TurkishMorphology createWithDefaults() { Stopwatch sw = Stopwatch.createStarted(); TurkishMorphology instance = new Builder().setLexicon(RootLexicon.getDefault()).build(); Log.info("Initialized in %d ms.", sw.elapsed(TimeUnit.MILLISECONDS)); return instance; }
public static TurkishMorphology create(RootLexicon lexicon) { return new Builder().setLexicon(lexicon).build(); }
@Override public void analyzeWord( WordAnalysisRequest request, StreamObserver<WordAnalysisProto> responseObserver) { String input = request.getInput(); WordAnalysis a = morphology.analyze(input); responseObserver.onNext(toWordAnalysisProto(a)); responseObserver.onCompleted(); }
public static Builder builder(RootLexicon lexicon) { return new Builder().setLexicon(lexicon); }
public ResultSentence disambiguate(String sentence) { List<WordAnalysis> ambiguous = analyzer.analyzeSentence(sentence); ResultSentence s = new ResultSentence(sentence, ambiguous); s.makeDecisions(rules); return s; }
public WordAnalysis analyze(String word) { return useCache ? analyzeWithCache(word) : analyzeWithoutCache(word); }
public static Builder builder() { return new Builder(); }
public ZemberekContext() { tokenizer = TurkishTokenizer.ALL; morphology = TurkishMorphology.createWithDefaults(); }
Singleton() { morphology = TurkishMorphology.createWithDefaults(); } }
public AmbiguityStats() throws IOException { parser = TurkishMorphology.createWithDefaults(); }
static void saveUnambigious() throws IOException { Path goldTest = Paths.get("data/gold/gold-test.sentences"); //Path goldTest = Paths.get("data/gold/test.txt"); Path goldTestOut = Paths.get("data/gold/gold-test.txt"); TurkishMorphology morphology = TurkishMorphology.createWithDefaults(); saveUnambiguous(clean(Files.readAllLines(goldTest)), morphology, goldTestOut); }
public static void main(String[] args) throws Exception { TurkishMorphology morphology = TurkishMorphology.createWithDefaults(); Path indexRoot = Paths.get("/home/aaa/data/zemberek/corpus-index"); CorpusSearcher searcher = new CorpusSearcher(indexRoot); AmbiguousExampleFinder finder = new AmbiguousExampleFinder(searcher); extractSentences(morphology, finder); }
public static void main(String[] args) throws IOException { TurkishMorphology morphology = TurkishMorphology.createWithDefaults(); TurkishSpellChecker spellChecker = new TurkishSpellChecker(morphology); Log.info("Check if written correctly."); String[] words = {"Ankara'ya", "Ankar'aya", "yapbileceksen", "yapabileceğinizden"}; for (String word : words) { Log.info(word + " -> " + spellChecker.check(word)); } Log.info(); Log.info("Give suggestions."); String[] toSuggest = {"Kraamanda", "okumuştk", "yapbileceksen", "oukyamıyorum"}; for (String s : toSuggest) { Log.info(s + " -> " + spellChecker.suggestForWord(s)); } } }