public static void main(String[] args) throws Exception { ToolRunner.run(new ParallelALSFactorizationJob(), args); }
private Path pathToM(int iteration) { return iteration == numIterations - 1 ? getOutputPath("M") : getTempPath("M-" + iteration); }
addInputOption(); addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numThreadsPerSolver", null, "threads per solver mapper", String.valueOf(1)); addOption("usesLongIDs", null, "input contains long IDs that need to be translated"); Map<String,List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; numFeatures = Integer.parseInt(getOption("numFeatures")); numIterations = Integer.parseInt(getOption("numIterations")); lambda = Double.parseDouble(getOption("lambda")); alpha = Double.parseDouble(getOption("alpha")); implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback")); numThreadsPerSolver = Integer.parseInt(getOption("numThreadsPerSolver")); usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs", String.valueOf(false))); Job mapUsers = prepareJob(getInputPath(), getOutputPath("userIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); Job mapItems = prepareJob(getInputPath(), getOutputPath("itemIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class,
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) { IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } }
ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); double lambda = 0.065; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations),
ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf);
private Path pathToUserRatings() { return getOutputPath("userRatings"); } }
private Path pathToItemRatings() { return getTempPath("itemRatings"); }
addInputOption(); addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numThreadsPerSolver", null, "threads per solver mapper", String.valueOf(1)); addOption("usesLongIDs", null, "input contains long IDs that need to be translated"); Map<String,List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; numFeatures = Integer.parseInt(getOption("numFeatures")); numIterations = Integer.parseInt(getOption("numIterations")); lambda = Double.parseDouble(getOption("lambda")); alpha = Double.parseDouble(getOption("alpha")); implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback")); numThreadsPerSolver = Integer.parseInt(getOption("numThreadsPerSolver")); usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs", String.valueOf(false))); Job mapUsers = prepareJob(getInputPath(), getOutputPath("userIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); Job mapItems = prepareJob(getInputPath(), getOutputPath("itemIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class,
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); SequenceFile.Writer writer = null; try { writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class); IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } finally { Closeables.close(writer, false); } }
ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); double lambda = 0.065; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations),
private Path pathToUserRatings() { return getOutputPath("userRatings"); } }
private Path pathToItemRatings() { return getTempPath("itemRatings"); }
addInputOption(); addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numThreadsPerSolver", null, "threads per solver mapper", String.valueOf(1)); addOption("usesLongIDs", null, "input contains long IDs that need to be translated"); Map<String,List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; numFeatures = Integer.parseInt(getOption("numFeatures")); numIterations = Integer.parseInt(getOption("numIterations")); lambda = Double.parseDouble(getOption("lambda")); alpha = Double.parseDouble(getOption("alpha")); implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback")); numThreadsPerSolver = Integer.parseInt(getOption("numThreadsPerSolver")); boolean usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs", String.valueOf(false))); Job mapUsers = prepareJob(getInputPath(), getOutputPath("userIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); Job mapItems = prepareJob(getInputPath(), getOutputPath("itemIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class,
private Path pathToU(int iteration) { return iteration == numIterations - 1 ? getOutputPath("U") : getTempPath("U-" + iteration); }
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); SequenceFile.Writer writer = null; try { writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class); IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } finally { Closeables.close(writer, false); } }
ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob(); alsFactorization.setConf(conf); double alpha = 20; alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda), "--implicitFeedback", String.valueOf(true), "--alpha", String.valueOf(alpha),
public static void main(String[] args) throws Exception { ToolRunner.run(new ParallelALSFactorizationJob(), args); }
private Path pathToUserRatings() { return getOutputPath("userRatings"); } }
private Path pathToItemRatings() { return getTempPath("itemRatings"); }