/** * Checks two pairs for equality. * * @param obj object for comparison * @return <code>true</code> if <code>obj</code> is equal to this object, <code>false</code> * otherwise */ public boolean equals(Object obj) { PairOfStrings pair = (PairOfStrings) obj; return leftElement.equals(pair.getLeftElement()) && rightElement.equals(pair.getRightElement()); }
/** * Creates a pair. * * @param left the left element * @param right the right element */ public PairOfStrings(String left, String right) { set(left, right); }
/** * Clones this object. * * @return clone of this object */ public PairOfStrings clone() { return new PairOfStrings(this.leftElement, this.rightElement); }
@Override public int getPartition(PairOfStrings key, FloatWritable value, int numReduceTasks) { return (key.getLeftElement().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } }
@Override public void reduce(PairOfStrings key, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException { float sum = 0.0f; Iterator<FloatWritable> iter = values.iterator(); while (iter.hasNext()) { sum += iter.next().get(); } if (key.getRightElement().equals("*")) { VALUE.set(sum); context.write(key, VALUE); marginal = sum; } else { VALUE.set(sum / marginal); context.write(key, VALUE); } } }
public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) { return e1.getLeftElement().compareTo(e2.getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } });
@Override public int getPartition(PairOfStrings key, IntWritable value, int numReduceTasks) { return (key.getLeftElement().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } }
@Override public void reduce(PairOfStrings key, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException { float sum = 0.0f; Iterator<FloatWritable> iter = values.iterator(); while (iter.hasNext()) { sum += iter.next().get(); } if (key.getRightElement().equals("*")) { VALUE.set(sum); context.write(key, VALUE); marginal = sum; } else { VALUE.set(sum / marginal); context.write(key, VALUE); } } }
public int compare(PairOfWritables<PairOfStrings, FloatWritable> e1, PairOfWritables<PairOfStrings, FloatWritable> e2) { if (e1.getRightElement().compareTo(e2.getRightElement()) == 0) { return e1.getLeftElement().compareTo(e2.getLeftElement()); } return e2.getRightElement().compareTo(e1.getRightElement()); } });
/** * Defines a natural sort order for pairs. Pairs are sorted first by the left element, and then by * the right element. * * @return a value less than zero, a value greater than zero, or zero if this pair should be * sorted before, sorted after, or is equal to <code>obj</code>. */ public int compareTo(PairOfStrings pair) { String pl = pair.getLeftElement(); String pr = pair.getRightElement(); if (leftElement.equals(pl)) { return rightElement.compareTo(pr); } return leftElement.compareTo(pl); }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { List<String> tokens = Tokenizer.tokenize(value.toString()); if (tokens.size() < 2) return; for (int i = 1; i < tokens.size(); i++) { BIGRAM.set(tokens.get(i - 1), tokens.get(i)); context.write(BIGRAM, ONE); BIGRAM.set(tokens.get(i - 1), "*"); context.write(BIGRAM, ONE); } } }
@Override public int getPartition(PairOfStrings key, FloatWritable value, int numReduceTasks) { return (key.getLeftElement().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } }
public static PairOfStrings of(String left, String right) { return new PairOfStrings(left, right); } }
lld+= Double.parseDouble(iter.next().getRightElement()); tot++; PairOfStrings now = iter.next(); double w = Double.parseDouble(now.getRightElement()); double x = Double.parseDouble(now.getLeftElement()); sum += w; mu += x * w;
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { List<String> tokens = Tokenizer.tokenize(value.toString()); for (int i = 0; i < tokens.size(); i++) { for (int j = Math.max(i - window, 0); j < Math.min(i + window + 1, tokens.size()); j++) { if (i == j) continue; PAIR.set(tokens.get(i), tokens.get(j)); context.write(PAIR, ONE); } } } }
@Override public int getPartition(PairOfStrings key, IntWritable value, int numReduceTasks) { return (key.getLeftElement().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } }
public void reduce(PairOfStringInt key, Iterator<PairOfStrings> values, OutputCollector<IntWritable, PairOfIntString> output, Reporter reporter) throws IOException { if (key.getRightElement() == 0) { targetTitle = key.getLeftElement(); targetDocid = Integer.parseInt(values.next().getLeftElement()); } else { if (!key.getLeftElement().equals(targetTitle)) { return; } while (values.hasNext()) { PairOfStrings pair = values.next(); SRCID.set(Integer.parseInt(pair.getLeftElement())); TARGET_ANCHOR_PAIR.set(targetDocid, pair.getRightElement()); output.collect(SRCID, TARGET_ANCHOR_PAIR); } } } }
@Override public void map(LongWritable key, Text line, Context context) throws IOException, InterruptedException { String text = line.toString(); String[] terms = text.split("\\s+"); for (int i = 0; i < terms.length; i++) { String term = terms[i]; // skip empty tokens if (term.length() == 0) continue; for (int j = i - window; j < i + window + 1; j++) { if (j == i || j < 0) continue; if (j >= terms.length) break; // skip empty tokens if (terms[j].length() == 0) continue; PAIR.set(term, terms[j]); context.write(PAIR, ONE); } } } }
PairOfStrings bigram = p.getLeftElement(); if (bigram.getLeftElement().equals("light")) { list1.add(p); if (bigram.getLeftElement().equals("contain")) { list2.add(p);
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String prev = null; StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { String cur = itr.nextToken(); // Emit only if we have an actual bigram. if (prev != null) { // Simple way to truncate tokens that are too long. if (cur.length() > 100) { cur = cur.substring(0, 100); } if (prev.length() > 100) { prev = prev.substring(0, 100); } BIGRAM.set(prev, cur); context.write(BIGRAM, ONE); BIGRAM.set(prev, "*"); context.write(BIGRAM, ONE); } prev = cur; } } }