/** * Gets the best super sense for a word. * * @param pos the pos * @param word the word * @return the best super sense */ public Optional<String> getBestSuperSense(POS pos, String word) { final Optional<IndexWord> indexWord = lookupWord(pos, word); if (!indexWord.isPresent()) { return Optional.empty(); } else { final List<Synset> senses = indexWord.get().getSenses(); if (senses.isEmpty()) { return Optional.empty(); } else { // At this stage we could do something clever, look at the gloss to see is there are // word overlaps // but we opt for a more predicatable concept of selecting the most commonly used // meaning sense. return Optional.of(stripPOSFromSupersense(senses.get(0).getLexFileName())); } } }
/** * Lookup the word from the dictionary, performing lemmisation if required. * * @param pos the pos * @param word the word * @return the WordNet word, (as an optional) */ public Optional<IndexWord> lookupWord(final POS pos, final String word) { try { return Optional.ofNullable(dictionary.lookupIndexWord(pos, word)); } catch (final JWNLException e) { getMonitor().warn("Lookup word failed", e); return Optional.empty(); } }
@Test public void testMissingLookupWord() throws JWNLException { final Optional<IndexWord> word1 = wnr.lookupWord(POS.VERB, "ascasdcscz"); Assert.assertFalse(word1.isPresent()); final Optional<IndexWord> word2 = wnr.getWord(POS.VERB, "ascasdcscz"); Assert.assertFalse(word2.isPresent()); final long count = wnr.getSuperSenses(POS.VERB, "ascasdcscz").count(); Assert.assertEquals(0, count); final Optional<String> word = wnr.getBestSuperSense(POS.VERB, "ascasdcscz"); Assert.assertFalse(word.isPresent()); } }
@Test public void testBestSuperSense() throws JWNLException { final Optional<String> word = wnr.getBestSuperSense(POS.VERB, "know"); Assert.assertEquals("cognition", word.get()); }
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { dictionary = wordnet.getDictionary(); try (CsvInteractionWriter writer = new CsvInteractionWriter(outputFilename)) { final CsvInteractionReader reader = new CsvInteractionReader(inputFilename); writer.initialise(); reader.read( (i, a) -> { final Set<String> alternatives = getAlternativeWords(i.getWord()) .map(s -> s.trim().toLowerCase()) // We don't want any small words, they are too commons .filter(s -> s.length() > 2) // We don't want any phrases .filter(s -> s.indexOf(' ') == -1) .collect(Collectors.toSet()); // Add in whatever the user provided alternatives.addAll(a); writeRow(writer, i, alternatives); }); getMonitor().info("Interaction enhacement complete and written to {}", outputFilename); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } }
@Before public void before() throws ResourceInitializationException { ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription( "wordnet", SharedWordNetResource.class); wnr = new SharedWordNetResource(); wnr.initialize(erd.getResourceSpecifier(), Collections.emptyMap()); }
@Test public void testLookupWord() throws JWNLException { final IndexWord word = wnr.lookupWord(POS.VERB, "employing").get(); Assert.assertEquals("employ", word.getLemma()); }
@Override protected void doDestroy() { super.doDestroy(); try { dictionary.close(); } catch (final JWNLException e) { getLogger().warn("WordNet dictionary did not close cleanly", e); } finally { dictionary = null; } }
@Test public void testDestory() { wnr.destroy(); }
@Test public void testSuperSense() throws JWNLException { final List<String> word = wnr.getSuperSenses(POS.VERB, "employs").collect(Collectors.toList()); Assert.assertTrue("consumption".equals(word.get(0)) || "consumption".equals(word.get(1))); Assert.assertTrue("social".equals(word.get(0)) || "social".equals(word.get(1))); }
@Test public void testGetWord() throws JWNLException { final Optional<IndexWord> missing = wnr.getWord(POS.VERB, "employs"); Assert.assertFalse(missing.isPresent()); final IndexWord employ = wnr.getWord(POS.VERB, "employ").get(); Assert.assertNotNull(employ); Assert.assertEquals("employ", employ.getLemma()); }
wordnet.getBestSuperSense(interaction.getWord().getPos(), lemma).orElse(lemma);
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { dictionary = wordnet.getDictionary(); try (CsvInteractionWriter writer = new CsvInteractionWriter(outputFilename)) { final CsvInteractionReader reader = new CsvInteractionReader(inputFilename); writer.initialise(); reader.read( (i, a) -> { final Set<String> alternatives = getAlternativeWords(i.getWord()) .map(s -> s.trim().toLowerCase()) // We don't want any small words, they are too commons .filter(s -> s.length() > 2) // We don't want any phrases .filter(s -> s.indexOf(' ') == -1) .collect(Collectors.toSet()); // Add in whatever the user provided alternatives.addAll(a); writeRow(writer, i, alternatives); }); getMonitor().info("Interaction enhacement complete and written to {}", outputFilename); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } }
@Override protected void doDestroy() { super.doDestroy(); try { dictionary.close(); } catch (final JWNLException e) { getLogger().warn("WordNet dictionary did not close cleanly", e); } finally { dictionary = null; } }
/** * Gets the super senses of a word. * * <p>The supersense is the original 'sense file' in which word was defined. * * @param pos the pos * @param word the word * @return the super senses */ public Stream<String> getSuperSenses(POS pos, String word) { final Optional<IndexWord> indexWord = lookupWord(pos, word); if (!indexWord.isPresent()) { return Stream.empty(); } else { // NOTE: This was stream but it WordNet getSenses() somehow seems incompatible with // streams final List<Synset> senses = indexWord.get().getSenses(); final Set<String> set = new HashSet<>(); for (final Synset s : senses) { set.add(stripPOSFromSupersense(s.getLexFileName())); } return set.stream(); } }
wordnet.getBestSuperSense(interaction.getWord().getPos(), lemma).orElse(lemma);
@Test public void testGetDictionary() { Assert.assertNotNull(wnr.getDictionary()); }
/** * Get an exact lemma from the dictionary, . * * @param pos the pos * @param lemma the lemma * @return the WordNet word (as an optional) */ public Optional<IndexWord> getWord(final POS pos, final String lemma) { try { return Optional.ofNullable(dictionary.getIndexWord(pos, lemma)); } catch (final JWNLException e) { getMonitor().warn("Get word failed", e); return Optional.empty(); } }
/** * Gets the super senses of a word. * * <p>The supersense is the original 'sense file' in which word was defined. * * @param pos the pos * @param word the word * @return the super senses */ public Stream<String> getSuperSenses(POS pos, String word) { final Optional<IndexWord> indexWord = lookupWord(pos, word); if (!indexWord.isPresent()) { return Stream.empty(); } else { // NOTE: This was stream but it WordNet getSenses() somehow seems incompatible with // streams final List<Synset> senses = indexWord.get().getSenses(); final Set<String> set = new HashSet<>(); for (final Synset s : senses) { set.add(stripPOSFromSupersense(s.getLexFileName())); } return set.stream(); } }
/** * Lookup the word from the dictionary, performing lemmisation if required. * * @param pos the pos * @param word the word * @return the WordNet word, (as an optional) */ public Optional<IndexWord> lookupWord(final POS pos, final String word) { try { return Optional.ofNullable(dictionary.lookupIndexWord(pos, word)); } catch (final JWNLException e) { getMonitor().warn("Lookup word failed", e); return Optional.empty(); } }