/** * Return the first cached file in the list, else null if thre are no cached files. * @param conf - MapReduce Configuration * @return Path of Cached file * @throws IOException - IO Exception */ public static Path getSingleCachedFile(Configuration conf) throws IOException { return getCachedFiles(conf)[0]; }
/** * Return the first cached file in the list, else null if thre are no cached files. * @param conf - MapReduce Configuration * @return Path of Cached file * @throws IOException - IO Exception */ public static Path getSingleCachedFile(Configuration conf) throws IOException { return getCachedFiles(conf)[0]; }
/** * Return the first cached file in the list, else null if thre are no cached files. * @param conf - MapReduce Configuration * @return Path of Cached file * @throws IOException - IO Exception */ public static Path getSingleCachedFile(Configuration conf) throws IOException { return getCachedFiles(conf)[0]; }
/** * Helper method. Get a path from the DistributedCache * * @param conf * configuration * @param index * index of the path in the DistributedCache files * @return path from the DistributedCache * @throws IOException * if no path is found */ public static Path getDistributedCacheFile(Configuration conf, int index) throws IOException { Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length <= index) { throw new IOException("path not found in the DistributedCache"); } return files[index]; }
/** * Helper method. Get a path from the DistributedCache * * @param conf * configuration * @param index * index of the path in the DistributedCache files * @return path from the DistributedCache * @throws IOException * if no path is found */ public static Path getDistributedCacheFile(Configuration conf, int index) throws IOException { Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length <= index) { throw new IOException("path not found in the DistributedCache"); } return files[index]; }
/** * Helper method. Get a path from the DistributedCache * * @param conf * configuration * @param index * index of the path in the DistributedCache files * @return path from the DistributedCache * @throws IOException * if no path is found */ public static Path getDistributedCacheFile(Configuration conf, int index) throws IOException { Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length <= index) { throw new IOException("path not found in the DistributedCache"); } return files[index]; }
/** * Loads the vector from {@link DistributedCache}. Returns null if no vector exists. */ public static Vector load(Configuration conf) throws IOException { Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } if (log.isInfoEnabled()) { log.info("Files are: {}", Arrays.toString(files)); } return load(conf, files[0]); }
/** * Loads the vector from {@link DistributedCache}. Returns null if no vector exists. */ public static Vector load(Configuration conf) throws IOException { Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } if (log.isInfoEnabled()) { log.info("Files are: {}", Arrays.toString(files)); } return load(conf, files[0]); }
/** * Loads the vector from {@link DistributedCache}. Returns null if no vector exists. */ public static Vector load(Configuration conf) throws IOException { Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } if (log.isInfoEnabled()) { log.info("Files are: {}", Arrays.toString(files)); } return load(conf, files[0]); }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int k = Integer.parseInt(conf.get(QRFirstStep.PROP_K)); int p = Integer.parseInt(conf.get(QRFirstStep.PROP_P)); kp = k + p; outKey = new SplitPartitionedWritable(context); blockHeight = conf.getInt(BtJob.PROP_OUTER_PROD_BLOCK_HEIGHT, -1); distributedBt = conf.get(PROP_BT_BROADCAST) != null; if (distributedBt) { btLocalPath = HadoopUtil.getCachedFiles(conf); localFsConfig = new Configuration(); localFsConfig.set("fs.default.name", "file:///"); } /* * PCA -related corrections (MAHOUT-817) */ String xiPathStr = conf.get(PROP_XI_PATH); if (xiPathStr != null) { xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf); sq = SSVDHelper.loadAndSumUpVectors(new Path(conf.get(PROP_SQ_PATH)), conf); } } }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int k = Integer.parseInt(conf.get(QRFirstStep.PROP_K)); int p = Integer.parseInt(conf.get(QRFirstStep.PROP_P)); kp = k + p; outKey = new SplitPartitionedWritable(context); blockHeight = conf.getInt(BtJob.PROP_OUTER_PROD_BLOCK_HEIGHT, -1); distributedBt = conf.get(PROP_BT_BROADCAST) != null; if (distributedBt) { btLocalPath = HadoopUtil.getCachedFiles(conf); localFsConfig = new Configuration(); localFsConfig.set("fs.default.name", "file:///"); } /* * PCA -related corrections (MAHOUT-817) */ String xiPathStr = conf.get(PROP_XI_PATH); if (xiPathStr != null) { xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf); sq = SSVDHelper.loadAndSumUpVectors(new Path(conf.get(PROP_SQ_PATH)), conf); } } }
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int k = Integer.parseInt(conf.get(QRFirstStep.PROP_K)); int p = Integer.parseInt(conf.get(QRFirstStep.PROP_P)); kp = k + p; outKey = new SplitPartitionedWritable(context); blockHeight = conf.getInt(BtJob.PROP_OUTER_PROD_BLOCK_HEIGHT, -1); distributedBt = conf.get(PROP_BT_BROADCAST) != null; if (distributedBt) { btLocalPath = HadoopUtil.getCachedFiles(conf); localFsConfig = new Configuration(); localFsConfig.set("fs.default.name", "file:///"); } /* * PCA -related corrections (MAHOUT-817) */ String xiPathStr = conf.get(PROP_XI_PATH); if (xiPathStr != null) { xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf); sq = SSVDHelper.loadAndSumUpVectors(new Path(conf.get(PROP_SQ_PATH)), conf); } } }
/** * Generates the fList from the serialized string representation * * @return Deserialized Feature Frequency List */ public static List<Pair<String,Long>> readFList(Configuration conf) throws IOException { List<Pair<String,Long>> list = Lists.newArrayList(); Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } for (Pair<Text,LongWritable> record : new SequenceFileIterable<Text,LongWritable>(files[0], true, conf)) { list.add(new Pair<String,Long>(record.getFirst().toString(), record.getSecond().get())); } return list; }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); //To change body of overridden methods use File | Settings | File Templates. Configuration conf = context.getConfiguration(); Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length < 2) { throw new IOException("not enough paths in the DistributedCache"); } dataset = Dataset.load(conf, files[0]); converter = new DataConverter(dataset); forest = DecisionForest.load(conf, files[1]); if (forest == null) { throw new InterruptedException("DecisionForest not found!"); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); //To change body of overridden methods use File | Settings | File Templates. Configuration conf = context.getConfiguration(); Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length < 2) { throw new IOException("not enough paths in the DistributedCache"); } dataset = Dataset.load(conf, files[0]); converter = new DataConverter(dataset); forest = DecisionForest.load(conf, files[1]); if (forest == null) { throw new InterruptedException("DecisionForest not found!"); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); //To change body of overridden methods use File | Settings | File Templates. Configuration conf = context.getConfiguration(); Path[] files = HadoopUtil.getCachedFiles(conf); if (files.length < 2) { throw new IOException("not enough paths in the DistributedCache"); } dataset = Dataset.load(conf, files[0]); converter = new DataConverter(dataset); forest = DecisionForest.load(conf, files[1]); if (forest == null) { throw new InterruptedException("DecisionForest not found!"); } }
Path[] btFiles = HadoopUtil.getCachedFiles(context.getConfiguration());
public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities, Configuration conf) throws IOException { IntWritable rowIndex = new IntWritable(); VectorWritable row = new VectorWritable(); OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>(); Path[] cachedFiles = HadoopUtil.getCachedFiles(conf); LocalFileSystem localFs = FileSystem.getLocal(conf); for (Path cachedFile : cachedFiles) { SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(localFs, cachedFile, conf); while (reader.next(rowIndex, row)) { featureMatrix.put(rowIndex.get(), row.get()); } } finally { Closeables.close(reader, true); } } Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty"); return featureMatrix; }
public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities, Configuration conf) throws IOException { IntWritable rowIndex = new IntWritable(); VectorWritable row = new VectorWritable(); OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>(); Path[] cachedFiles = HadoopUtil.getCachedFiles(conf); LocalFileSystem localFs = FileSystem.getLocal(conf); for (Path cachedFile : cachedFiles) { SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(localFs, cachedFile, conf); while (reader.next(rowIndex, row)) { featureMatrix.put(rowIndex.get(), row.get()); } } finally { Closeables.close(reader, true); } } Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty"); return featureMatrix; }
public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities, Configuration conf) throws IOException { IntWritable rowIndex = new IntWritable(); VectorWritable row = new VectorWritable(); OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>(); Path[] cachedFiles = HadoopUtil.getCachedFiles(conf); LocalFileSystem localFs = FileSystem.getLocal(conf); for (Path cachedFile : cachedFiles) { try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs.getConf(), SequenceFile.Reader.file(cachedFile))) { while (reader.next(rowIndex, row)) { featureMatrix.put(rowIndex.get(), row.get()); } } } Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty"); return featureMatrix; }