private Map<Long, FastIDSet> readUserItemFilter(String pathString) throws IOException { Map<Long, FastIDSet> result = new HashMap<>(); try (InputStream in = openFile(pathString)) { for (String line : new FileLineIterable(in)) { try { String[] tokens = SEPARATOR.split(line); Long userId = Long.parseLong(tokens[0]); Long itemId = Long.parseLong(tokens[1]); addUserAndItemIdToUserItemFilter(result, userId, itemId); } catch (NumberFormatException nfe) { log.warn("userItemFile line ignored: {}", line); } } } return result; }
private Map<Long, FastIDSet> readUserItemFilter(String pathString) throws IOException { Map<Long, FastIDSet> result = Maps.newHashMap(); InputStream in = openFile(pathString); try { for (String line : new FileLineIterable(in)) { try { String[] tokens = SEPARATOR.split(line.toString()); Long userId = Long.parseLong(tokens[0]); Long itemId = Long.parseLong(tokens[1]); addUserAndItemIdToUserItemFilter(result, userId, itemId); } catch (NumberFormatException nfe) { log.warn("userItemFile line ignored: {}", line); } } } finally { Closeables.close(in, true); } return result; }
@Override protected void setup(Context context) throws IOException { Configuration jobConf = context.getConfiguration(); maxPrefsPerUserConsidered = jobConf.getInt(MAX_PREFS_PER_USER_CONSIDERED, DEFAULT_MAX_PREFS_PER_USER_CONSIDERED); String usersFilePathString = jobConf.get(USERS_FILE); if (usersFilePathString != null) { FSDataInputStream in = null; try { Path unqualifiedUsersFilePath = new Path(usersFilePathString); FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf); usersToRecommendFor = new FastIDSet(); Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs); in = fs.open(usersFilePath); for (String line : new FileLineIterable(in)) { try { usersToRecommendFor.add(Long.parseLong(line)); } catch (NumberFormatException nfe) { log.warn("usersFile line ignored: {}", line); } } } finally { Closeables.close(in, true); } } }
@Override protected void process(FileStatus fst, Path current) throws IOException { if (fst.isDir()) { fs.listStatus(fst.getPath(), new SequenceFilesFromCsvFilter(conf, prefix + Path.SEPARATOR + current.getName(), this.options, writer)); } else { InputStream in = fs.open(fst.getPath()); for (CharSequence aFit : new FileLineIterable(in, charset, false)) { String[] columns = TAB.split(aFit); log.info("key : {}, value : {}", columns[keyColumn], columns[valueColumn]); String key = columns[keyColumn]; String value = columns[valueColumn]; writer.write(prefix + key, value); } } } }
private FastByIDMap<String> buildMapping() throws IOException { FastByIDMap<String> mapping = new FastByIDMap<String>(); for (String line : new FileLineIterable(dataFile)) { mapping.put(toLongID(line), line); } lastModified = dataFile.lastModified(); return mapping; }
@Override protected void process(FileStatus fst, Path current) throws IOException { if (fst.isDir()) { fs.listStatus(fst.getPath(), new PrefixAdditionFilter(conf, prefix + Path.SEPARATOR + current.getName(), options, writer)); } else { InputStream in = null; try { in = fs.open(fst.getPath()); StringBuilder file = new StringBuilder(); for (String aFit : new FileLineIterable(in, charset, false)) { file.append(aFit).append('\n'); } String name = current.getName().equals(fst.getPath().getName()) ? current.getName() : current.getName() + Path.SEPARATOR + fst.getPath().getName(); writer.write(prefix + Path.SEPARATOR + name, file.toString()); } finally { IOUtils.closeStream(in); } } } }
private FastByIDMap<String> buildMapping() throws IOException { FastByIDMap<String> mapping = new FastByIDMap<String>(); for (String line : new FileLineIterable(dataFile)) { mapping.put(toLongID(line), line); } lastModified = dataFile.lastModified(); return mapping; }
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); recommendationsPerUser = conf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS); booleanData = conf.getBoolean(RecommenderJob.BOOLEAN_DATA, false); indexItemIDMap = TasteHadoopUtils.readIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf); String itemFilePathString = conf.get(ITEMS_FILE); if (itemFilePathString != null) { itemsToRecommendFor = new FastIDSet(); for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) { try { itemsToRecommendFor.add(Long.parseLong(line)); } catch (NumberFormatException nfe) { log.warn("itemsFile line ignored: {}", line); } } } }
private FastIDSet readIDList(String pathString) throws IOException { FastIDSet result = null; if (pathString != null) { result = new FastIDSet(); try (InputStream in = openFile(pathString)){ for (String line : new FileLineIterable(in)) { try { result.add(Long.parseLong(line)); } catch (NumberFormatException nfe) { log.warn("line ignored: {}", line); } } } } return result; }
private FastByIDMap<String> buildMapping() throws IOException { FastByIDMap<String> mapping = new FastByIDMap<>(); for (String line : new FileLineIterable(dataFile)) { mapping.put(toLongID(line), line); } lastModified = dataFile.lastModified(); return mapping; }
Matcher messageBoundaryMatcher = MESSAGE_START.matcher(""); for (String nextLine : new FileLineIterable(current, charset, false)) {
private FastIDSet readIDList(String pathString) throws IOException { FastIDSet result = null; if (pathString != null) { result = new FastIDSet(); InputStream in = openFile(pathString); try { for (String line : new FileLineIterable(in)) { try { result.add(Long.parseLong(line)); } catch (NumberFormatException nfe) { log.warn("line ignored: {}", line); } } } finally { Closeables.close(in, true); } } return result; }
static Map<Pair<Long,Long>, Double> readSimilarities(File file) throws IOException { Map<Pair<Long,Long>, Double> similarities = Maps.newHashMap(); for (String line : new FileLineIterable(file)) { String[] parts = line.split("\t"); similarities.put(new Pair<Long,Long>(Long.parseLong(parts[0]), Long.parseLong(parts[1])), Double.parseDouble(parts[2])); } return similarities; }
public static FastIDSet[] parseMenWomen(File genderFile) throws IOException { FastIDSet men = new FastIDSet(50000); FastIDSet women = new FastIDSet(50000); for (String line : new FileLineIterable(genderFile)) { int comma = line.indexOf(','); char gender = line.charAt(comma + 1); if (gender == 'U') { continue; } long profileID = Long.parseLong(line.substring(0, comma)); if (gender == 'M') { men.add(profileID); } else { women.add(profileID); } } men.rehash(); women.rehash(); return new FastIDSet[] { men, women }; }
static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws IOException { Map<Long,List<RecommendedItem>> recommendations = Maps.newHashMap(); for (String line : new FileLineIterable(file)) { String[] keyValue = line.split("\t"); long userID = Long.parseLong(keyValue[0]); String[] tokens = keyValue[1].replaceAll("\\[", "") .replaceAll("\\]", "").split(","); List<RecommendedItem> items = Lists.newLinkedList(); for (String token : tokens) { String[] itemTokens = token.split(":"); long itemID = Long.parseLong(itemTokens[0]); float value = Float.parseFloat(itemTokens[1]); items.add(new GenericRecommendedItem(itemID, value)); } recommendations.put(userID, items); } return recommendations; }
inputStreamAgain = fs.open(input); fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern), fp.generateFList( new StringRecordIterator(new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport), minSupport, try { fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern), fp.generateFList( new StringRecordIterator(new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport), minSupport,