@Override public void seal() { sealed = true; sortedLongList = new long[rawLongSet.size()]; rawLongSet.toArray(sortedLongList); Arrays.sort(sortedLongList); if (sortedLongList.length == 0) { min = null; max = null; return; } // Update min/max based on raw docs. min = sortedLongList[0]; max = sortedLongList[sortedLongList.length - 1]; // Merge the raw and aggregated docs, so stats for dictionary creation are collected correctly. int numAggregated = aggregatedLongSet.size(); if (numAggregated > 0) { rawLongSet.addAll(aggregatedLongSet); sortedLongList = new long[rawLongSet.size()]; rawLongSet.toArray(sortedLongList); Arrays.sort(sortedLongList); } } }
/** * The set of items this user has *seen* in either training or test. * @return The set of all seen items (training and test). */ public LongSet getSeenItems() { if (seenItems == null) { LongSet items = new LongOpenHashSet(getTrainItems()); items.addAll(getTestItems()); seenItems = items; } return seenItems; }
/** * Compute the union of two sets. * * @param a The first set. * @param b The second set. * @return The elements of <var>items</var> that are not in <var>exclude</var>. */ public static LongSortedSet setUnion(LongSet a, LongSet b) { if (a instanceof LongSortedSet && b instanceof LongSortedSet) { return setUnion((LongSortedSet) a, (LongSortedSet) b); } else { LongSet set = new LongOpenHashSet(a); set.addAll(b); return packedSet(set); } }
allItems.addAll(items);
@Nonnull @Override public ResultMap scoreWithDetails(long user, @Nonnull Collection<Long> items) { Long2DoubleMap userRatings = rvDAO.userRatingVector(user); if (userRatings.isEmpty()) { Map<Long, Double> scores = baseline.score(user, items); return Results.newResultMap(Iterables.transform(scores.entrySet(), Results.fromEntryFunction())); } else { // score everything, both rated and not, for offsets LongSet allItems = new LongOpenHashSet(userRatings.keySet()); allItems.addAll(items); Map<Long, Double> baseScores = baseline.score(user, allItems); Long2DoubleMap offsets = new Long2DoubleOpenHashMap(); // subtract scores from ratings, yielding offsets Long2DoubleFunction bsf = LongUtils.asLong2DoubleMap(baseScores); for (Long2DoubleMap.Entry e: userRatings.long2DoubleEntrySet()) { double base = bsf.get(e.getLongKey()); offsets.put(e.getLongKey(), e.getDoubleValue() - base); } double meanOffset = Vectors.sum(offsets) / (offsets.size() + damping); // to score: fill with baselines, add user mean offset List<Result> results = new ArrayList<>(items.size()); LongIterator iter = LongIterators.asLongIterator(items.iterator()); while (iter.hasNext()) { long item = iter.nextLong(); results.add(Results.create(item, bsf.get(item) + meanOffset)); } return Results.newResultMap(results); } }
@Override public Iterable<Neighbor> getCandidateNeighbors(final long user, LongSet items) { Long2DoubleMap urs = rvDAO.userRatingVector(user); if (urs.isEmpty()) { return Collections.emptyList(); } final Long2DoubleMap normed = similarityNormalizer.makeTransformation(user, urs) .apply(urs); assert normed != null; LongCollection qset = items; if (normed.size() < qset.size()) { qset = normed.keySet(); } final LongSet candidates = new LongOpenHashSet(); for (LongIterator iter = qset.iterator(); iter.hasNext();) { final long item = iter.nextLong(); LongSet users = snapshot.getItemUsers(item); if (users != null) { candidates.addAll(users); } } candidates.remove(user); logger.debug("Found {} candidate neighbors for user {}", candidates.size(), user); return new Iterable<Neighbor>() { @Override public Iterator<Neighbor> iterator() { return new NeighborIterator(user, normed, candidates); } }; }
/** * Get the IDs of the candidate neighbors for a user. * @param user The user. * @param userItems The user's rated items. * @param targetItems The set of target items. * @return The set of IDs of candidate neighbors. */ private LongSet findCandidateNeighbors(long user, LongSet userItems, LongCollection targetItems) { LongSet users = new LongOpenHashSet(100); LongIterator items; if (userItems.size() < targetItems.size()) { items = userItems.iterator(); } else { items = targetItems.iterator(); } while (items.hasNext()) { LongSet iusers = dao.query(CommonTypes.RATING) .withAttribute(CommonAttributes.ITEM_ID, items.nextLong()) .valueSet(CommonAttributes.USER_ID); if (iusers != null) { users.addAll(iusers); } } users.remove(user); return users; }
static ResultList merge(int n, ResultList left, ResultList right, double weight) { Long2IntMap leftRanks = LongUtils.itemRanks(LongUtils.asLongList(left.idList())); Long2IntMap rightRanks = LongUtils.itemRanks(LongUtils.asLongList(right.idList())); int nl = left.size(); int nr = right.size(); LongSet allItems = new LongOpenHashSet(); allItems.addAll(leftRanks.keySet()); allItems.addAll(rightRanks.keySet()); ResultAccumulator accum = ResultAccumulator.create(n); for (LongIterator iter = allItems.iterator(); iter.hasNext();) { long item = iter.nextLong(); int rl = leftRanks.get(item); int rr = rightRanks.get(item); double s1 = rankToScore(rl, nl); double s2 = rankToScore(rr, nr); double score = weight * s1 + (1.0-weight) * s2; accum.add(new RankBlendResult(item, score, rl >= 0 ? left.get(rl) : null, rl, rr >= 0 ? right.get(rr) : null, rl)); } return accum.finish(); }
mergedExclude.addAll(xs);
/** * Compute the union of two sets. * * @param a The first set. * @param b The second set. * @return The elements of <var>items</var> that are not in <var>exclude</var>. */ public static LongSortedSet setUnion(LongSet a, LongSet b) { if (a instanceof LongSortedSet && b instanceof LongSortedSet) { return setUnion((LongSortedSet) a, (LongSortedSet) b); } else { LongSet set = new LongOpenHashSet(a); set.addAll(b); return packedSet(set); } }
@Override public LongColumn unique() { LongSet longSet = new LongArraySet(); longSet.addAll(data); return LongColumn.create(name() + " Unique values", new LongArrayList(longSet)); }
public LongSet getAllItems() { LongSet items = new LongOpenHashSet(trainData.getItemDAO().getItemIds()); items.addAll(testData.getItemDAO().getItemIds()); return items; }
SparseVector ratings = makeUserVector(user, userEventDao); LongSet allItems = new LongOpenHashSet(ratings.keySet()); allItems.addAll(items);
@Nonnull @Override public ResultMap scoreWithDetails(long user, @Nonnull Collection<Long> items) { Long2DoubleMap userRatings = rvDAO.userRatingVector(user); if (userRatings.isEmpty()) { Map<Long, Double> scores = baseline.score(user, items); return Results.newResultMap(Iterables.transform(scores.entrySet(), Results.fromEntryFunction())); } else { // score everything, both rated and not, for offsets LongSet allItems = new LongOpenHashSet(userRatings.keySet()); allItems.addAll(items); Map<Long, Double> baseScores = baseline.score(user, allItems); Long2DoubleMap offsets = new Long2DoubleOpenHashMap(); // subtract scores from ratings, yielding offsets Long2DoubleFunction bsf = LongUtils.asLong2DoubleMap(baseScores); for (Long2DoubleMap.Entry e: userRatings.long2DoubleEntrySet()) { double base = bsf.get(e.getLongKey()); offsets.put(e.getLongKey(), e.getDoubleValue() - base); } double meanOffset = Vectors.sum(offsets) / (offsets.size() + damping); // to score: fill with baselines, add user mean offset List<Result> results = new ArrayList<>(items.size()); LongIterator iter = LongIterators.asLongIterator(items.iterator()); while (iter.hasNext()) { long item = iter.nextLong(); results.add(Results.create(item, bsf.get(item) + meanOffset)); } return Results.newResultMap(results); } }
@Override public Iterable<Neighbor> getCandidateNeighbors(final long user, LongSet items) { Long2DoubleMap urs = rvDAO.userRatingVector(user); if (urs.isEmpty()) { return Collections.emptyList(); } final Long2DoubleMap normed = similarityNormalizer.makeTransformation(user, urs) .apply(urs); assert normed != null; LongCollection qset = items; if (normed.size() < qset.size()) { qset = normed.keySet(); } final LongSet candidates = new LongOpenHashSet(); for (LongIterator iter = qset.iterator(); iter.hasNext();) { final long item = iter.nextLong(); LongSet users = snapshot.getItemUsers(item); if (users != null) { candidates.addAll(users); } } candidates.remove(user); logger.debug("Found {} candidate neighbors for user {}", candidates.size(), user); return new Iterable<Neighbor>() { @Override public Iterator<Neighbor> iterator() { return new NeighborIterator(user, normed, candidates); } }; }
/** * Get the IDs of the candidate neighbors for a user. * @param user The user. * @param userItems The user's rated items. * @param targetItems The set of target items. * @return The set of IDs of candidate neighbors. */ private LongSet findCandidateNeighbors(long user, LongSet userItems, LongCollection targetItems) { LongSet users = new LongOpenHashSet(100); LongIterator items; if (userItems.size() < targetItems.size()) { items = userItems.iterator(); } else { items = targetItems.iterator(); } while (items.hasNext()) { LongSet iusers = dao.query(CommonTypes.RATING) .withAttribute(CommonAttributes.ITEM_ID, items.nextLong()) .valueSet(CommonAttributes.USER_ID); if (iusers != null) { users.addAll(iusers); } } users.remove(user); return users; }
static ResultList merge(int n, ResultList left, ResultList right, double weight) { Long2IntMap leftRanks = LongUtils.itemRanks(LongUtils.asLongList(left.idList())); Long2IntMap rightRanks = LongUtils.itemRanks(LongUtils.asLongList(right.idList())); int nl = left.size(); int nr = right.size(); LongSet allItems = new LongOpenHashSet(); allItems.addAll(leftRanks.keySet()); allItems.addAll(rightRanks.keySet()); ResultAccumulator accum = ResultAccumulator.create(n); for (LongIterator iter = allItems.iterator(); iter.hasNext();) { long item = iter.nextLong(); int rl = leftRanks.get(item); int rr = rightRanks.get(item); double s1 = rankToScore(rl, nl); double s2 = rankToScore(rr, nr); double score = weight * s1 + (1.0-weight) * s2; accum.add(new RankBlendResult(item, score, rl >= 0 ? left.get(rl) : null, rl, rr >= 0 ? right.get(rr) : null, rl)); } return accum.finish(); }
mergedExclude.addAll(xs);