public FrequencyHashSet<K> clone() { FrequencyHashSet<K> ret = new FrequencyHashSet<K>(); for (K k : support.keySet()) { ret.add(k, support.get(k)); } return ret; }
subToken = subToken.replaceAll("^[^+]*\\+", ""); formToFeats.putIfAbsent(subToken, new FrequencyHashSet<>()); formToFeats.get(subToken).add(fea); formToForms.putIfAbsent(subToken, new HashMap<>()); formToForms.get(subToken).putIfAbsent(form, new FrequencyHashSet<>()); formToForms.get(subToken).get(form).add(fea); System.out.println(feats); if (formToFeats.get(feats) != null) { String mostFrequent = formToFeats.get(feats).mostFrequent(); System.out.println("Most frequent: " + mostFrequent); System.out.println("Frequency: " + formToFeats.get(feats).get(mostFrequent) + "/" + formToFeats.get(feats).sum()); System.out.println(formToFeats.get(feats)); System.out.println(formToForms.get(feats));
public static Integer getHead(VerbMultiToken verb, SemanticGraph semanticGraph) { FrequencyHashSet<Integer> frequencies = new FrequencyHashSet<>(); Set<Integer> indexes = new HashSet<>(); try { IndexedWord node = semanticGraph.getNodeByIndex(index); frequencies.add(index); List<IndexedWord> pathToRoot = semanticGraph.getPathToRoot(node); for (IndexedWord indexedWord : pathToRoot) { frequencies.add(indexedWord.index()); for (Integer key : frequencies.keySet()) { keys.add(key); frequencies.remove(index); return frequencies.mostFrequent();
FrequencyHashSet<String> formMeanings = new FrequencyHashSet<>(); formMeanings.add(form); for (String form : formMeanings.keySet()) { if (formMeanings.get(form) == 1) { buffer.add(form); ArrayList<String[]> run = runner.run(toFstan); FrequencyHashSet<String> fstanForms = new FrequencyHashSet<>(); fstanForms.add(suffix); if (fstanForms.size() == 0) { LOGGER.warn("No forms for: {}", type); continue; String mf = fstanForms.mostFrequent();
FrequencyHashSet<String> frequecies = new FrequencyHashSet<>(); frequecies.add(form, frequency); for (Map.Entry<String, Integer> entry : frequecies.getSorted()) { String form = entry.getKey();
public Token guess(String token, String pos) { String reverse_sample_query = new StringBuilder(token).reverse().toString(); FrequencyHashSet<String> values = new FrequencyHashSet<>(); Iterable<LinkedList<String>> closestForms = trees.get(pos).getValuesForClosestKeys(reverse_sample_query); for (LinkedList<String> s : closestForms) { values.add(s.get(2)); String guess = values.mostFrequent(); String guessed_lemma = token; for (LinkedList<String> closestForm : closestForms) {
public void addAll(Collection<K> collection) { for (K el : collection) { add(el); } }
public static void main(String[] args) { try { final CommandLine cmd = CommandLine .parser() .withName("VerbNetStatisticsExtractor") .withHeader("Extracts statistics from OntoNotes on frequency of VerbNet/FrameNet") .withOption("n", "ontonotes", "OntoNotes folder", "FOLDER", CommandLine.Type.DIRECTORY_EXISTING, true, false, true) .withOption("o", "output", "output file", "FILE", CommandLine.Type.FILE, true, false, true) .withLogger(LoggerFactory.getLogger("eu.fbk.nafview")).parse(args); final File dir = cmd.getOptionValue("n", File.class); final File output = cmd.getOptionValue("o", File.class); VerbNetStatisticsExtractor statisticsExtractor = new VerbNetStatisticsExtractor(); statisticsExtractor.loadDir(dir.getAbsolutePath()); try { statisticsExtractor.loadFrequencies(); } catch (Exception e) { e.printStackTrace(); } BufferedWriter writer = new BufferedWriter(new FileWriter(output)); for (String key : statisticsExtractor.getVnTotals().keySet()) { writer.append("VN").append("\t").append(key).append("\t").append(statisticsExtractor.getVnTotals().get(key).toString()).append("\n"); } for (String key : statisticsExtractor.getFnTotals().keySet()) { writer.append("FN").append("\t").append(key).append("\t").append(statisticsExtractor.getFnTotals().get(key).toString()).append("\n"); } writer.close(); } catch (final Throwable ex) { CommandLine.fail(ex); } }
subToken = subToken.replaceAll("^[^+]*\\+", ""); formToFeats.putIfAbsent(subToken, new FrequencyHashSet<>()); formToFeats.get(subToken).add(fea); formToForms.putIfAbsent(subToken, new HashMap<>()); formToForms.get(subToken).putIfAbsent(form, new FrequencyHashSet<>()); formToForms.get(subToken).get(form).add(fea); System.out.println(feats); if (formToFeats.get(feats) != null) { String mostFrequent = formToFeats.get(feats).mostFrequent(); System.out.println("Most frequent: " + mostFrequent); System.out.println("Frequency: " + formToFeats.get(feats).get(mostFrequent) + "/" + formToFeats.get(feats).sum()); System.out.println(formToFeats.get(feats)); System.out.println(formToForms.get(feats));
public static Integer getHead(VerbMultiToken verb, SemanticGraph semanticGraph) { FrequencyHashSet<Integer> frequencies = new FrequencyHashSet<>(); Set<Integer> indexes = new HashSet<>(); try { IndexedWord node = semanticGraph.getNodeByIndex(index); frequencies.add(index); List<IndexedWord> pathToRoot = semanticGraph.getPathToRoot(node); for (IndexedWord indexedWord : pathToRoot) { frequencies.add(indexedWord.index()); for (Integer key : frequencies.keySet()) { keys.add(key); frequencies.remove(index); return frequencies.mostFrequent();
FrequencyHashSet<String> frequecies = new FrequencyHashSet<>(); frequecies.add(form, frequency); for (Map.Entry<String, Integer> entry : frequecies.getSorted()) { String form = entry.getKey();
public Token guess(String token, String pos) { String reverse_sample_query = new StringBuilder(token).reverse().toString(); FrequencyHashSet<String> values = new FrequencyHashSet<>(); Iterable<LinkedList<String>> closestForms = trees.get(pos).getValuesForClosestKeys(reverse_sample_query); for (LinkedList<String> s : closestForms) { values.add(s.get(2)); String guess = values.mostFrequent(); String guessed_lemma = token; for (LinkedList<String> closestForm : closestForms) {
public void add(K o) { add(o, 1); }
fileIterator = FileUtils.iterateFiles(nafFolder, new String[]{"naf"}, false); FrequencyHashSet influenceRet = new FrequencyHashSet(); FrequencyHashSet influenceRev = new FrequencyHashSet(); FrequencyHashSet goodFor = new FrequencyHashSet(); FrequencyHashSet badFor = new FrequencyHashSet(); Opinion.OpinionExpression expression = opinion.getOpinionExpression(); if (expression.getPolarity().equals("reverse")) { influenceRev.add(spanToLemmas(expression.getSpan())); influenceRet.add(spanToLemmas(expression.getSpan())); Opinion.OpinionExpression expression = opinion.getOpinionExpression(); if (expression.getPolarity().equals("goodfor")) { goodFor.add(spanToLemmas(expression.getSpan())); badFor.add(spanToLemmas(expression.getSpan())); System.out.println(influenceRet.getSorted()); System.out.println(influenceRev.getSorted()); System.out.println(goodFor.getSorted()); System.out.println(badFor.getSorted()); } catch (final Throwable ex) { CommandLine.fail(ex);
public void loadFrequencies(String fileName) throws IOException { vnTotals = new FrequencyHashSet<>(); fnTotals = new FrequencyHashSet<>(); List<String> lines = Files.readLines(new File(fileName), Charset.defaultCharset()); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\\s+"); if (parts.length < 3) { continue; } if (parts[0].equals("FN")) { fnTotals.add(parts[1], Integer.parseInt(parts[2])); } if (parts[0].equals("VN")) { vnTotals.add(parts[1], Integer.parseInt(parts[2])); } } }
vnTotals = new FrequencyHashSet<>(); fnTotals = new FrequencyHashSet<>(); if (vnMappings.get(key) != null) { for (String vn : vnMappings.get(key)) { vnTotals.add(vn); fnTotals.add(fn);