public void map(IntWritable key, VectorAndPrefsWritable vectorAndPrefsWritable, Context context) throws IOException, InterruptedException { Vector cooccurrenceColumn = vectorAndPrefsWritable.getVector(); List<Long> userIDs = vectorAndPrefsWritable.getUserIDs(); List<Float> prefValues = vectorAndPrefsWritable.getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); float prefValue = prefValues.get(i); Vector partialProduct = cooccurrenceColumn.times(prefValue); context.write(new VarLongWritable(userID), new VectorWritable(partialProduct)); } } }
public VectorAndPrefsWritable(Vector vector, List<Long> userIDs, List<Float> values) { set(vector, userIDs, values); }
@Override public boolean matches(Object argument) { if (argument instanceof VectorAndPrefsWritable) { VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument; Vector vector = vectorAndPrefs.getVector(); if (vector.getNumNondefaultElements() != 1) { return false; } if (!Double.isNaN(vector.get(TasteHadoopUtils.idToIndex(itemID)))) { return false; } if (userIDs.length != vectorAndPrefs.getUserIDs().size()) { return false; } for (long userID : userIDs) { if (!vectorAndPrefs.getUserIDs().contains(userID)) { return false; } } return true; } return false; }
/** * tests {@link PartialMultiplyMapper} */ @Test public void testPartialMultiplyMapper() throws Exception { Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); similarityColumn.set(3, 0.5); similarityColumn.set(7, 0.8); Mapper<VarIntWritable,VectorAndPrefsWritable,VarLongWritable,PrefAndSimilarityColumnWritable>.Context context = EasyMock.createMock(Mapper.Context.class); PrefAndSimilarityColumnWritable one = new PrefAndSimilarityColumnWritable(); PrefAndSimilarityColumnWritable two = new PrefAndSimilarityColumnWritable(); one.set(1.0f, similarityColumn); two.set(3.0f, similarityColumn); context.write(EasyMock.eq(new VarLongWritable(123L)), EasyMock.eq(one)); context.write(EasyMock.eq(new VarLongWritable(456L)), EasyMock.eq(two)); EasyMock.replay(context); VectorAndPrefsWritable vectorAndPrefs = new VectorAndPrefsWritable(similarityColumn, Arrays.asList(123L, 456L), Arrays.asList(1.0f, 3.0f)); new PartialMultiplyMapper().map(new VarIntWritable(1), vectorAndPrefs, context); EasyMock.verify(context); }
@Override protected void map(VarIntWritable key, VectorAndPrefsWritable vectorAndPrefsWritable, Context context) throws IOException, InterruptedException { Vector similarityMatrixColumn = vectorAndPrefsWritable.getVector(); List<Long> userIDs = vectorAndPrefsWritable.getUserIDs(); List<Float> prefValues = vectorAndPrefsWritable.getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); float prefValue = prefValues.get(i); if (!Float.isNaN(prefValue)) { prefAndSimilarityColumn.set(prefValue, similarityMatrixColumn); userIDWritable.set(userID); context.write(userIDWritable, prefAndSimilarityColumn); } } }
public VectorAndPrefsWritable(Vector vector, List<Long> userIDs, List<Float> values) { set(vector, userIDs, values); }
@Override protected void map(VarIntWritable key, VectorAndPrefsWritable vectorAndPrefsWritable, Context context) throws IOException, InterruptedException { Vector similarityMatrixColumn = vectorAndPrefsWritable.getVector(); List<Long> userIDs = vectorAndPrefsWritable.getUserIDs(); List<Float> prefValues = vectorAndPrefsWritable.getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); float prefValue = prefValues.get(i); if (!Float.isNaN(prefValue)) { prefAndSimilarityColumn.set(prefValue, similarityMatrixColumn); userIDWritable.set(userID); context.write(userIDWritable, prefAndSimilarityColumn); } } }
public VectorAndPrefsWritable(Vector vector, List<Long> userIDs, List<Float> values) { set(vector, userIDs, values); }
@Override protected void map(VarIntWritable key, VectorAndPrefsWritable vectorAndPrefsWritable, Context context) throws IOException, InterruptedException { Vector similarityMatrixColumn = vectorAndPrefsWritable.getVector(); List<Long> userIDs = vectorAndPrefsWritable.getUserIDs(); List<Float> prefValues = vectorAndPrefsWritable.getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); float prefValue = prefValues.get(i); if (!Float.isNaN(prefValue)) { prefAndSimilarityColumn.set(prefValue, similarityMatrixColumn); userIDWritable.set(userID); context.write(userIDWritable, prefAndSimilarityColumn); } } }
@Override protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx) throws IOException, InterruptedException { int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get()); Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */ vector.set(itemIDIndex, Double.NaN); List<Long> userIDs = Lists.newArrayList(); List<Float> prefValues = Lists.newArrayList(); for (VarLongWritable userID : values) { userIDs.add(userID.get()); prefValues.add(1.0f); } itemIDIndexWritable.set(itemIDIndex); vectorAndPrefs.set(vector, userIDs, prefValues); ctx.write(itemIDIndexWritable, vectorAndPrefs); } }
@Override public boolean matches(Object argument) { if (argument instanceof VectorAndPrefsWritable) { VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument; if (!vectorAndPrefs.getUserIDs().equals(userIDs)) { return false; } if (!vectorAndPrefs.getValues().equals(prefValues)) { return false; } return MathHelper.consistsOf(vectorAndPrefs.getVector(), elements); } return false; }
@Override protected void reduce(VarIntWritable key, Iterable<VectorOrPrefWritable> values, Context context) throws IOException, InterruptedException { List<Long> userIDs = Lists.newArrayList(); List<Float> prefValues = Lists.newArrayList(); Vector similarityMatrixColumn = null; for (VectorOrPrefWritable value : values) { if (value.getVector() == null) { // Then this is a user-pref value userIDs.add(value.getUserID()); prefValues.add(value.getValue()); } else { // Then this is the column vector if (similarityMatrixColumn != null) { throw new IllegalStateException("Found two similarity-matrix columns for item index " + key.get()); } similarityMatrixColumn = value.getVector(); } } if (similarityMatrixColumn == null) { return; } vectorAndPrefs.set(similarityMatrixColumn, userIDs, prefValues); context.write(key, vectorAndPrefs); }
@Override protected void reduce(VarIntWritable key, Iterable<VectorOrPrefWritable> values, Context context) throws IOException, InterruptedException { List<Long> userIDs = new ArrayList<>(); List<Float> prefValues = new ArrayList<>(); Vector similarityMatrixColumn = null; for (VectorOrPrefWritable value : values) { if (value.getVector() == null) { // Then this is a user-pref value userIDs.add(value.getUserID()); prefValues.add(value.getValue()); } else { // Then this is the column vector if (similarityMatrixColumn != null) { throw new IllegalStateException("Found two similarity-matrix columns for item index " + key.get()); } similarityMatrixColumn = value.getVector(); } } if (similarityMatrixColumn == null) { return; } vectorAndPrefs.set(similarityMatrixColumn, userIDs, prefValues); context.write(key, vectorAndPrefs); }
@Override protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx) throws IOException, InterruptedException { int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get()); Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */ vector.set(itemIDIndex, Double.NaN); List<Long> userIDs = Lists.newArrayList(); List<Float> prefValues = Lists.newArrayList(); for (VarLongWritable userID : values) { userIDs.add(userID.get()); prefValues.add(1.0f); } itemIDIndexWritable.set(itemIDIndex); vectorAndPrefs.set(vector, userIDs, prefValues); ctx.write(itemIDIndexWritable, vectorAndPrefs); } }
@Override protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx) throws IOException, InterruptedException { int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get()); Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */ vector.set(itemIDIndex, Double.NaN); List<Long> userIDs = new ArrayList<>(); List<Float> prefValues = new ArrayList<>(); for (VarLongWritable userID : values) { userIDs.add(userID.get()); prefValues.add(1.0f); } itemIDIndexWritable.set(itemIDIndex); vectorAndPrefs.set(vector, userIDs, prefValues); ctx.write(itemIDIndexWritable, vectorAndPrefs); } }
@Override protected void reduce(VarIntWritable key, Iterable<VectorOrPrefWritable> values, Context context) throws IOException, InterruptedException { List<Long> userIDs = Lists.newArrayList(); List<Float> prefValues = Lists.newArrayList(); Vector similarityMatrixColumn = null; for (VectorOrPrefWritable value : values) { if (value.getVector() == null) { // Then this is a user-pref value userIDs.add(value.getUserID()); prefValues.add(value.getValue()); } else { // Then this is the column vector if (similarityMatrixColumn != null) { throw new IllegalStateException("Found two similarity-matrix columns for item index " + key.get()); } similarityMatrixColumn = value.getVector(); } } if (similarityMatrixColumn == null) { return; } vectorAndPrefs.set(similarityMatrixColumn, userIDs, prefValues); context.write(key, vectorAndPrefs); }