/** * Creates a pair. * * @param left the left element * @param right the right element */ public PairOfIntString(int left, String right) { set(left, right); }
public void reduce(PairOfStringInt key, Iterator<PairOfStrings> values, OutputCollector<IntWritable, PairOfIntString> output, Reporter reporter) throws IOException { if (key.getRightElement() == 0) { targetTitle = key.getLeftElement(); targetDocid = Integer.parseInt(values.next().getLeftElement()); } else { if (!key.getLeftElement().equals(targetTitle)) { return; } while (values.hasNext()) { PairOfStrings pair = values.next(); SRCID.set(Integer.parseInt(pair.getLeftElement())); TARGET_ANCHOR_PAIR.set(targetDocid, pair.getRightElement()); output.collect(SRCID, TARGET_ANCHOR_PAIR); } } } }
public void map(IntWritable key, ArrayListWritable<AnchorText> anchors, OutputCollector<PairOfIntString, IntWritable> output, Reporter reporter) throws IOException { for(AnchorText data : anchors) { if(data.isURL()) try { //extract the hostname for a given URL host = new URI(data.getText()).getHost(); }catch(Exception e) { return; } } for(AnchorText data : anchors) { if(!data.isExternalOutLink()) continue; valueWord.set(key.get()); for(int target : data) { keyWord.set(target, host); output.collect(keyWord, valueWord); } } } }