private String replaceWordsWithLemma(String sentence) { SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>(); for (SentenceWordAnalysis e : analysis) { SingleAnalysis best = e.getBestAnalysis(); if (best.isUnknown()) { res.add(e.getWordAnalysis().getInput()); continue; } List<String> lemmas = best.getLemmas(); res.add(lemmas.get(lemmas.size() - 1)); } return String.join(" ", res); }
private String replaceWordsWithLemma(String sentence) { SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>(); for (SentenceWordAnalysis e : analysis) { SingleAnalysis best = e.getBestAnalysis(); if (best.isUnknown()) { res.add(e.getWordAnalysis().getInput()); continue; } List<String> lemmas = best.getLemmas(); res.add(lemmas.get(0)); } return String.join(" ", res); }
private String splitWords(String sentence) { List<String> tokens = Splitter.on(" ").splitToList(sentence); // assume first is label. Remove label from sentence for morphological analysis. String label = tokens.get(0); tokens = tokens.subList(1, tokens.size()); sentence = String.join(" ", tokens); if (sentence.length() == 0) { return sentence; } SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>(); // add label first. res.add(label); for (SentenceWordAnalysis e : analysis) { SingleAnalysis best = e.getBestAnalysis(); if (best.isUnknown()) { res.add(e.getWordAnalysis().getInput()); continue; } List<String> lemmas = best.getLemmas(); res.add(lemmas.get(lemmas.size() - 1)); } return String.join(" ", res); }
private String replaceWordsWithLemma(String sentence) { List<String> tokens = Splitter.on(" ").splitToList(sentence); // assume first is label. Remove label from sentence for morphological analysis. String label = tokens.get(0); tokens = tokens.subList(1, tokens.size()); sentence = String.join(" ", tokens); if (sentence.length() == 0) { return sentence; } SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>(); // add label first. res.add(label); for (SentenceWordAnalysis e : analysis) { SingleAnalysis best = e.getBestAnalysis(); if (best.isUnknown()) { res.add(e.getWordAnalysis().getInput()); continue; } List<String> lemmas = best.getLemmas(); res.add(lemmas.get(lemmas.size() - 1)); } return String.join(" ", res); }
protected String replaceWordsWithLemma(String sentence) { List<String> tokens = Splitter.on(" ").splitToList(sentence); // assume first is label. Remove label from sentence for morphological analysis. String label = tokens.get(0); tokens = tokens.subList(1, tokens.size()); sentence = String.join(" ", tokens); if (sentence.length() == 0) { return sentence; } SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>(); // add label first. res.add(label); for (SentenceWordAnalysis e : analysis) { SingleAnalysis best = e.getBestAnalysis(); if (best.isUnknown()) { res.add(e.getWordAnalysis().getInput()); continue; } List<String> lemmas = best.getLemmas(); res.add(lemmas.get(0)); } return String.join(" ", res); }
@Override public void analyzeSentence(SentenceAnalysisRequest request, StreamObserver<SentenceAnalysisProto> responseObserver) { String sentence = request.getInput(); SentenceAnalysis a = morphology.analyzeAndDisambiguate(sentence); Log.info("Sentence = %s", sentence); responseObserver.onNext(toSentenceAnalysis(a, request.getContainAllAnalyses())); responseObserver.onCompleted(); }
return sentence; SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>();
public static void saveUnambiguous( List<String> sentences, TurkishMorphology morphology, Path out) throws IOException { try (PrintWriter pwMorph = new PrintWriter(out.toFile(), "utf-8")) { for (String sentence : sentences) { SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); if (analysis.bestAnalysis().stream().anyMatch(SingleAnalysis::isUnknown)) { continue; } pwMorph.format("S:%s%n", sentence); for (SentenceWordAnalysis sw : analysis) { WordAnalysis wa = sw.getWordAnalysis(); pwMorph.println(wa.getInput()); SingleAnalysis best = sw.getBestAnalysis(); for (SingleAnalysis singleAnalysis : wa) { boolean isBest = singleAnalysis.equals(best); if (wa.analysisCount() == 1) { pwMorph.println(singleAnalysis.formatLong()); } else { pwMorph.format("%s%s%n", singleAnalysis.formatLong(), isBest ? "*" : ""); } } } pwMorph.println(); } } }
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); if (containsUnkown(analysis)) { continue;
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(input);
for (String sentence : toProcess) { try { SentenceAnalysis sentenceAnalysis = morphology.analyzeAndDisambiguate(sentence); for (SentenceWordAnalysis analysis : sentenceAnalysis) { HashSet<String> stems = new HashSet<>(4);
for (String sentence : sentences) { List<SingleAnalysis> analysis = morphology.analyzeAndDisambiguate(sentence).bestAnalysis();
public static void main(String[] args) { TurkishMorphology morphology = TurkishMorphology.builder() .setLexicon(RootLexicon.getDefault()) .useInformalAnalysis() .build(); List<SingleAnalysis> analyses = morphology .analyzeAndDisambiguate("okuycam diyo") .bestAnalysis(); for (SingleAnalysis a : analyses) { System.out.println(a.surfaceForm() + "-" + a); } System.out.println("Converting formal surface form:"); InformalAnalysisConverter converter = new InformalAnalysisConverter(morphology.getWordGenerator()); for (SingleAnalysis a : analyses) { System.out.println(converter.convert(a.surfaceForm(), a)); } } }
public static void main(String[] args) { TurkishMorphology morphology = TurkishMorphology.createWithDefaults(); String sentence = "Keşke yarın hava güzel olsa."; Log.info("Sentence = " + sentence); SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); for (SentenceWordAnalysis a : analysis) { PrimaryPos primaryPos = a.getBestAnalysis().getPos(); Log.info("%s : %s ", a.getWordAnalysis().getInput(), primaryPos); } }
public void findStems(String str) { str = "<s> <s> " + str + " </s> </s>"; SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(str); List<SentenceWordAnalysis> swaList = analysis.getWordAnalyses();
private String replaceWordsWithLemma(String sentence) { List<String> tokens = Splitter.on(" ").splitToList(sentence); // assume first is label. Remove label from sentence for morphological analysis. String label = tokens.get(0); tokens = tokens.subList(1, tokens.size()); sentence = String.join(" ", tokens); SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<String> res = new ArrayList<>(); // add label first. res.add(label); for (SentenceWordAnalysis e : analysis) { SingleAnalysis best = e.getBestAnalysis(); if (best.isUnknown()) { res.add(e.getWordAnalysis().getInput()); continue; } List<String> lemmas = best.getLemmas(); res.add(lemmas.get(lemmas.size() - 1)); } return String.join(" ", res); }
public void initializeController() { post("/find_pos", (req, res) -> { String sentence = req.queryParams("sentence"); SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); List<POSResult> results = new ArrayList<>(); for (SentenceWordAnalysis entry : analysis.getWordAnalyses()) { POSResult item = new POSResult(); SingleAnalysis bestAnalysis = entry.getBestAnalysis(); item.analysis = bestAnalysis.formatLexical(); item.normalizedInput = entry.wordAnalysis.getNormalizedInput(); item.pos = bestAnalysis.getPos().shortForm; item.input = entry.wordAnalysis.getInput(); item.morphemesLexical = bestAnalysis.formatMorphemesLexical(); results.add(item); } return jsonConverter.toJson(results); }); } }
public static void main(String[] args) { TurkishMorphology morphology = TurkishMorphology.createWithDefaults(); String sentence = "Keşke yarın hava güzel olsa."; Log.info("Sentence = " + sentence); SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence); for (SentenceWordAnalysis a : analysis) { PrimaryPos primaryPos = a.getBestAnalysis().getPos(); Log.info("%s : %s ", a.getWordAnalysis().getInput(), primaryPos); } }
if (disambiguate) { if (deepWordAnalysis) { SentenceAnalysis results = morphology.analyzeAndDisambiguate(sentence); for (SentenceWordAnalysis wordAnalysis : results) { List<AnalyzeWordItem> analyze_list = new ArrayList<>(); List<SingleAnalysis> singleAnalyses = morphology.analyzeAndDisambiguate(sentence) .bestAnalysis(); for (SingleAnalysis analysis : singleAnalyses) {