@Override public void seal() { sealed = true; sortedLongList = new long[rawLongSet.size()]; rawLongSet.toArray(sortedLongList); Arrays.sort(sortedLongList); if (sortedLongList.length == 0) { min = null; max = null; return; } // Update min/max based on raw docs. min = sortedLongList[0]; max = sortedLongList[sortedLongList.length - 1]; // Merge the raw and aggregated docs, so stats for dictionary creation are collected correctly. int numAggregated = aggregatedLongSet.size(); if (numAggregated > 0) { rawLongSet.addAll(aggregatedLongSet); sortedLongList = new long[rawLongSet.size()]; rawLongSet.toArray(sortedLongList); Arrays.sort(sortedLongList); } } }
@Override public int countUnique() { LongSet ints = new LongOpenHashSet(data.size()); for (long i : data) { ints.add(i); } return ints.size(); }
@Override public int countUnique() { LongSet uniqueElements = new LongOpenHashSet(); for (int i = 0; i < size(); i++) { if (!isMissingValue(getLong(i))) { uniqueElements.add(getLong(i)); } } return uniqueElements.size(); }
@Override public DateTimeColumn unique() { LongSet ints = new LongOpenHashSet(data.size()); for (long i : data) { ints.add(i); } DateTimeColumn column = emptyCopy(ints.size()); column.setName(name() + " Unique values"); column.data = LongArrayList.wrap(ints.toLongArray()); return column; }
@Override public int size() { return keySet.size(); } }
@Override public int size() { return keySet.size(); }
@Override public Long2IntMap splitEntities(LongSet entities, int np, Random rng) { Long2IntMap emap = new Long2IntOpenHashMap(entities.size()); logger.info("Splitting {} entities into {} partitions", entities.size(), np); long[] array = entities.toLongArray(); LongArrays.shuffle(array, rng); for (int i = 0; i < array.length; i++) { final long user = array[i]; emap.put(user, i % np); } return emap; }
private LongSet getEffectiveCandidates(long user, LongSet candidates, LongSet exclude) { if (candidates == null) { candidates = getPredictableItems(user); } if (exclude == null) { exclude = getDefaultExcludes(user); } logger.debug("computing effective candidates for user {} from {} candidates and {} excluded items", user, candidates.size(), exclude.size()); if (!exclude.isEmpty()) { candidates = LongUtils.setDifference(candidates, exclude); } return candidates; }
@Override public Long2IntMap splitEntities(LongSet entities, int np, Random rng) { if (np * sampleSize > entities.size()) { logger.warn("cannot make {} disjoint samples of {} from {} entities, partitioning", np, sampleSize, entities.size()); return partition().splitEntities(entities, np, rng); } else { Long2IntMap emap = new Long2IntOpenHashMap(entities.size()); logger.info("Sampling {} entities into {} disjoint samples of {}", entities.size(), np, sampleSize); long[] earray = entities.toLongArray(); LongArrays.shuffle(earray, rng); for (int p = 0; p < np; p++) { for (int i = 0; i < sampleSize; i++) { long u = earray[p*sampleSize + i]; emap.put(u, p); } } return emap; } }
/** * Implement recommendation by calling {@link ItemScorer#scoreWithDetails(long, Collection)} and sorting * the results. This method uses {@link #getDefaultExcludes(long)} to get the default * exclude set for the user, if none is provided. */ @Override protected ResultList recommendWithDetails(long user, int n, LongSet candidates, LongSet exclude) { candidates = getEffectiveCandidates(user, candidates, exclude); logger.debug("Computing {} recommendations for user {} from {} candidates", n, user, candidates.size()); ResultMap scores = scorer.scoreWithDetails(user, candidates); return getTopNResults(n, scores); }
@Nonnull @Override public MetricResult measureUserRecList(Recommender rec, TestUser user, int targetLength, List<Long> recs, Context context) { int tp = 0; LongSet items = goodItems.selectItems(context.universe, rec, user); for (long item: recs) { if(items.contains(item)) { tp += 1; } } if (items.size() > 0 && recs.size() > 0) { // if both the items set and recommendations are non-empty (no division by 0). double precision = (double) tp / recs.size(); double recall = (double) tp / items.size(); context.addUser(precision, recall); return new PresRecResult(precision, recall).withSuffix(suffix); } else { context.addUser(0, 0); return new PresRecResult(0, 0).withSuffix(suffix); } }
/** * Compute the intersection of two sets. * * @param a The first set. * @param b The second set. * @return The elements present in both sets. */ public static LongSortedSet setIntersect(LongSet a, LongSet b) { if (a instanceof LongSortedSet && b instanceof LongSortedSet) { return setIntersect((LongSortedSet) a, (LongSortedSet) b); } else if (a.size() <= b.size()) { LongArrayList longs = new LongArrayList(Math.min(a.size(), b.size())); LongIterator iter = a.iterator(); while (iter.hasNext()) { long key = iter.nextLong(); if (b.contains(key)) { longs.add(key); } } return LongUtils.packedSet(longs); } else { return setIntersect(b, a); } }
logger.info("wrote {} item IDs", items.size());
@Override public Iterable<Neighbor> getCandidateNeighbors(final long user, LongSet items) { Long2DoubleMap ratings = rvDAO.userRatingVector(user); if (ratings.isEmpty()) { return Collections.emptyList(); } final Long2DoubleMap nratings = similarityNormalizer.makeTransformation(user, ratings) .apply(ratings); final LongSet candidates = findCandidateNeighbors(user, nratings.keySet(), items); logger.debug("found {} candidate neighbors for {}", candidates.size(), user); return new Iterable<Neighbor>() { @Override public Iterator<Neighbor> iterator() { return new NeighborIterator(user, nratings, candidates); } }; }
/** * Compute the set difference of two sets. * * @param items The initial set * @param exclude The items to remove * @return The elements of <var>items</var> that are not in <var>exclude</var>. */ public static LongSortedSet setDifference(LongSet items, LongSet exclude) { long[] data = new long[items.size()]; final LongIterator iter = items.iterator(); int i = 0; while (iter.hasNext()) { final long x = iter.nextLong(); if (!exclude.contains(x)) { data[i++] = x; } } if (!(items instanceof LongSortedSet)) { Arrays.sort(data, 0, i); } // trim the array if (data.length * 2 > i * 3) { data = Arrays.copyOf(data, i); } return SortedKeyIndex.wrap(data, i).keySet(); }
/** * Creates an accumulator to process rating data and generate the necessary data for * a {@code SlopeOneItemScorer}. * * @param damping A damping term for deviation calculations. * @param items The set of known item IDs. */ public SlopeOneModelDataAccumulator(double damping, LongSet items) { this.damping = damping; workMatrix = new Long2ObjectOpenHashMap<>(items.size()); LongIterator iter = items.iterator(); while (iter.hasNext()) { long item = iter.nextLong(); workMatrix.put(item, Pair.<Long2DoubleMap, Long2IntMap>of(new Long2DoubleOpenHashMap(), new Long2IntOpenHashMap())); } }
/** * Implement recommendation by calling {@link ItemScorer#score(long, Collection)} and sorting * the results by score. This method uses {@link #getDefaultExcludes(long)} to get the default * exclude set for the user, if none is provided. */ @Override protected List<Long> recommend(long user, int n, LongSet candidates, LongSet exclude) { candidates = getEffectiveCandidates(user, candidates, exclude); logger.debug("Computing {} recommendations for user {} from {} candidates", n, user, candidates.size()); Map<Long, Double> scores = scorer.score(user, candidates); Long2DoubleAccumulator accum; if (n >= 0) { accum = new TopNLong2DoubleAccumulator(n); } else { accum = new UnlimitedLong2DoubleAccumulator(); } Long2DoubleMap map = LongUtils.asLong2DoubleMap(scores); for (Long2DoubleMap.Entry e: Vectors.fastEntries(map)) { accum.put(e.getLongKey(), e.getDoubleValue()); } return accum.finishList(); }
Preconditions.checkNotNull(user, "item set"); Long2ObjectOpenHashMap<SortedListAccumulator<Neighbor>> heaps = new Long2ObjectOpenHashMap<>(items.size()); for (LongIterator iter = items.iterator(); iter.hasNext();) { long item = iter.nextLong();
@Override public Iterable<Neighbor> getCandidateNeighbors(final long user, LongSet items) { Long2DoubleMap urs = rvDAO.userRatingVector(user); if (urs.isEmpty()) { return Collections.emptyList(); } final Long2DoubleMap normed = similarityNormalizer.makeTransformation(user, urs) .apply(urs); assert normed != null; LongCollection qset = items; if (normed.size() < qset.size()) { qset = normed.keySet(); } final LongSet candidates = new LongOpenHashSet(); for (LongIterator iter = qset.iterator(); iter.hasNext();) { final long item = iter.nextLong(); LongSet users = snapshot.getItemUsers(item); if (users != null) { candidates.addAll(users); } } candidates.remove(user); logger.debug("Found {} candidate neighbors for user {}", candidates.size(), user); return new Iterable<Neighbor>() { @Override public Iterator<Neighbor> iterator() { return new NeighborIterator(user, normed, candidates); } }; }
/** * Get the IDs of the candidate neighbors for a user. * @param user The user. * @param userItems The user's rated items. * @param targetItems The set of target items. * @return The set of IDs of candidate neighbors. */ private LongSet findCandidateNeighbors(long user, LongSet userItems, LongCollection targetItems) { LongSet users = new LongOpenHashSet(100); LongIterator items; if (userItems.size() < targetItems.size()) { items = userItems.iterator(); } else { items = targetItems.iterator(); } while (items.hasNext()) { LongSet iusers = dao.query(CommonTypes.RATING) .withAttribute(CommonAttributes.ITEM_ID, items.nextLong()) .valueSet(CommonAttributes.USER_ID); if (iusers != null) { users.addAll(iusers); } } users.remove(user); return users; }