@Test public void testNumberOfVertices() throws Exception { GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(GiraphHelloWorld.class); conf.setVertexInputFormatClass(IntIntNullTextInputFormat.class); conf.setVertexOutputFormatClass(AdjacencyListTextVertexOutputFormat.class); Iterable<String> results = InternalVertexRunner.run(conf, graphSeed); } }
/** * Get the output directory to write YourKit snapshots to * * @param context Map context * @return output directory */ public String getYourKitOutputDir(Mapper.Context context) { final String cacheKey = "giraph.yourkit.outputDirCached"; String outputDir = get(cacheKey); if (outputDir == null) { outputDir = getStringVars(YOURKIT_OUTPUT_DIR, YOURKIT_OUTPUT_DIR_DEFAULT, context); set(cacheKey, outputDir); } return outputDir; }
@Override protected void prepareConfiguration(GiraphConfiguration conf, CommandLine cmd) { conf.setComputationClass(ReducersBenchmarkComputation.class); conf.setMasterComputeClass(ReducersBenchmarkMasterCompute.class); conf.setVertexInputFormatClass(PseudoRandomVertexInputFormat.class); conf.setWorkerContextClass(ReducersBenchmarkWorkerContext.class); conf.setLong(PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, BenchmarkOption.VERTICES.getOptionLongValue(cmd)); conf.setLong(PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, 1); conf.setInt(REDUCERS_NUM, REDUCERS.getOptionIntValue(cmd)); conf.setInt("workers", conf.getInt(GiraphConstants.MAX_WORKERS, -1)); }
final String computationClassName, final int workers) throws ClassNotFoundException, IOException { conf.setWorkerConfiguration(workers, workers, 100.0f); if (cmd.hasOption("typesHolder")) { Class<? extends TypesHolder> typesHolderClass = conf.setMessageCombinerClass( (Class<? extends MessageCombiner>) Class.forName(cmd.getOptionValue("c"))); conf.setVertexValueCombinerClass( (Class<? extends VertexValueCombiner>) Class.forName(cmd.getOptionValue("vc"))); conf.setOutEdgesClass( (Class<? extends OutEdges>) Class.forName(cmd.getOptionValue("ve"))); conf.setInputOutEdgesClass( (Class<? extends OutEdges>) Class.forName(cmd.getOptionValue("ive"))); conf.setWorkerContextClass( (Class<? extends WorkerContext>) Class .forName(cmd.getOptionValue("wc"))); conf.setMasterComputeClass( (Class<? extends MasterCompute>) Class .forName(cmd.getOptionValue("mc"))); conf.setAggregatorWriterClass(
bspJob.getConfiguration().setComputationClass( SimpleCheckpointComputation.class); bspJob.getConfiguration().setVertexInputFormatClass( GeneratedVertexInputFormat.class); bspJob.getConfiguration().setVertexOutputFormatClass( IdWithValueTextOutputFormat.class); bspJob.getConfiguration().setWorkerContextClass( SimpleCheckpointVertexWorkerContext.class); bspJob.getConfiguration().setMasterComputeClass( SimpleCheckpointVertexMasterCompute.class); int minWorkers = Integer.parseInt(cmd.getOptionValue('w')); int maxWorkers = Integer.parseInt(cmd.getOptionValue('w')); bspJob.getConfiguration().setWorkerConfiguration( minWorkers, maxWorkers, 100.0f);
configuration.setComputationClass(WeightedPageRankComputation.class); int edgesClassOption = EDGES_CLASS.getOptionIntValue(cmd, 1); switch (edgesClassOption) { case 0: configuration.setOutEdgesClass(LongDoubleArrayEdges.class); break; case 1: configuration.setOutEdgesClass(ByteArrayEdges.class); break; case 2: configuration.setOutEdgesClass(ByteArrayEdges.class); configuration.useUnsafeSerialization(true); break; case 3: configuration.setOutEdgesClass(ArrayListEdges.class); break; case 4: configuration.setOutEdgesClass(HashMapEdges.class); break; default: LOG.info("Unknown OutEdges class, " + "defaulting to LongDoubleArrayEdges"); configuration.setOutEdgesClass(LongDoubleArrayEdges.class); GiraphConstants.VERTEX_EDGES_CLASS.get(configuration)); if (MESSAGE_COMBINER_TYPE.getOptionIntValue(cmd, 1) == 1) { configuration.setMessageCombinerClass(DoubleSumMessageCombiner.class); configuration.setEdgeInputFormatClass(PseudoRandomEdgeInputFormat.class);
private void addPrivateConfiguration(GiraphConfiguration configuration) { // Set currently fixed classes. TODO: Allow configuration at some point perhaps configuration.setComputationClass(io.arabesque.computation.ExecutionEngine.class); configuration.setMasterComputeClass(io.arabesque.computation.MasterExecutionEngine.class); configuration.setVertexInputFormatClass(ArabesqueInputFormat.class); // Calculate partition count based on # of workers and # of threads if no count was provided if (GiraphConstants.USER_PARTITION_COUNT.get(configuration) == -1) { int numWorkers = configuration.getMinWorkers(); int numComputeThreads = configuration.getNumComputeThreads(); GiraphConstants.USER_PARTITION_COUNT.set(configuration, numWorkers * numComputeThreads); } // Set our default worker context class instead of Giraph's default if (GiraphConstants.WORKER_CONTEXT_CLASS.get(configuration).equals(DefaultWorkerContext.class)) { configuration.setWorkerContextClass(WorkerContext.class); } }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { throw new IllegalArgumentException( "run: Must have 2 arguments <output path> <# of workers>"); } GiraphJob job = new GiraphJob(getConf(), getClass().getName()); job.getConfiguration().setComputationClass(SimpleComputation.class); job.getConfiguration().setVertexInputFormatClass( SimpleSuperstepVertexInputFormat.class); job.getConfiguration().setWorkerContextClass(EmitterWorkerContext.class); job.getConfiguration().set( SimpleVertexWithWorkerContext.OUTPUTDIR, args[0]); job.getConfiguration().setWorkerConfiguration(Integer.parseInt(args[1]), Integer.parseInt(args[1]), 100.0f); if (job.run(true)) { return 0; } else { return -1; } }
File tmpDir) throws Exception { String ns = conf.get(HBaseGraphConfiguration.Keys.GRAPH_NAMESPACE); String prefix = conf.get(HBaseGraphConfiguration.Keys.GRAPH_TABLE_PREFIX); String tablePrefix = (ns != null ? ns + TableName.NAMESPACE_DELIM : "") + (prefix != null ? prefix : ""); conf.set(Constants.EDGE_INPUT_TABLE, tablePrefix + Constants.EDGES); conf.set(Constants.VERTEX_INPUT_TABLE, tablePrefix + Constants.VERTICES); File zkMgrDir = FileUtils.createTempDir(tmpDir, "_defaultZkManagerDir"); conf.setWorkerConfiguration(1, 1, 100.0f); GiraphConstants.SPLIT_MASTER_WORKER.set(conf, false); GiraphConstants.LOCAL_TEST_MODE.set(conf, true); conf.set(GiraphConstants.ZOOKEEPER_DIR, zkDir.toString()); GiraphConstants.ZOOKEEPER_MANAGER_DIRECTORY.set(conf, zkMgrDir.toString()); GiraphJob job = new GiraphJob(conf, conf.getComputationName()); if (conf.hasVertexOutputFormat() && outFile.canRead()) { return Files.readLines(outFile, Charsets.UTF_8); } else {
JythonUtils.init(conf, "PageRank"); } else { conf.setComputationClass(PageRankComputation.class); conf.setOutEdgesClass(IntNullArrayEdges.class); conf.setMessageCombinerClass(FloatSumMessageCombiner.class); conf.setVertexInputFormatClass( PseudoRandomIntNullVertexInputFormat.class); conf.setInt(PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, BenchmarkOption.VERTICES.getOptionIntValue(cmd)); conf.setInt(PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, BenchmarkOption.EDGES_PER_VERTEX.getOptionIntValue(cmd)); conf.setInt(PageRankComputation.SUPERSTEP_COUNT, BenchmarkOption.SUPERSTEPS.getOptionIntValue(cmd)); conf.setFloat(PseudoRandomInputFormatConstants.LOCAL_EDGES_MIN_RATIO, BenchmarkOption.LOCAL_EDGES_MIN_RATIO.getOptionFloatValue(cmd, PseudoRandomInputFormatConstants.LOCAL_EDGES_MIN_RATIO_DEFAULT));
@Override protected void prepareConfiguration(GiraphConfiguration conf, CommandLine cmd) { conf.setComputationClass(ShortestPathsComputation.class); if (EDGES_CLASS.getOptionIntValue(cmd, 1) == 1) { conf.setOutEdgesClass(ArrayListEdges.class); } else { conf.setOutEdgesClass(HashMapEdges.class); } LOG.info("Using class " + GiraphConstants.COMPUTATION_CLASS.get(conf)); conf.setVertexInputFormatClass(PseudoRandomVertexInputFormat.class); if (!NO_COMBINER.optionTurnedOn(cmd)) { conf.setMessageCombinerClass(MinimumDoubleMessageCombiner.class); } conf.setLong(PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, BenchmarkOption.VERTICES.getOptionLongValue(cmd)); conf.setLong(PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, BenchmarkOption.EDGES_PER_VERTEX.getOptionLongValue(cmd)); }
File vertexInputFile = null; File edgeInputFile = null; if (conf.hasVertexInputFormat()) { vertexInputFile = FileUtils.createTempFile(tmpDir, "vertices.txt"); if (conf.hasEdgeInputFormat()) { edgeInputFile = FileUtils.createTempFile(tmpDir, "edges.txt"); if (conf.hasVertexInputFormat()) { FileUtils.writeLines(vertexInputFile, vertexInputData); if (conf.hasEdgeInputFormat()) { FileUtils.writeLines(edgeInputFile, edgeInputData); conf.setWorkerConfiguration(1, 1, 100.0f); GiraphConstants.SPLIT_MASTER_WORKER.set(conf, false); GiraphConstants.LOCAL_TEST_MODE.set(conf, true); conf.set(GiraphConstants.ZOOKEEPER_DIR, zkDir.toString()); GiraphConstants.ZOOKEEPER_MANAGER_DIRECTORY.set(conf, zkMgrDir.toString()); GiraphJob job = new GiraphJob(conf, conf.getComputationName()); if (conf.hasVertexInputFormat()) { GiraphFileInputFormat.setVertexInputPath(internalJob.getConfiguration(), new Path(vertexInputFile.toString())); if (conf.hasEdgeInputFormat()) {
conf.setVertexInputFormatClass(InMemoryVertexInputFormat.class); GiraphJob job = new GiraphJob(conf, conf.getComputationName()); conf.setWorkerConfiguration(1, 1, 100.0f); GiraphConstants.SPLIT_MASTER_WORKER.set(conf, false); GiraphConstants.LOCAL_TEST_MODE.set(conf, true); GiraphConstants.ZOOKEEPER_SERVER_PORT.set(conf, 0); conf.set(GiraphConstants.ZOOKEEPER_DIR, zkDir.toString()); GiraphConstants.ZOOKEEPER_MANAGER_DIRECTORY.set(conf, zkMgrDir.toString());
conf.setMasterComputeClass(BlockMasterCompute.class); conf.setComputationClass(BlockComputation.class); conf.setWorkerContextClass(BlockWorkerContext.class);
/** * Constructor. * * @param configuration User-defined configuration * @param jobName User-defined job name * @throws IOException */ public GiraphJob(Configuration configuration, String jobName) throws IOException { this(new GiraphConfiguration(configuration), jobName); }
final GiraphConfiguration giraphConf = new GiraphConfiguration(getConf()); final int zkPort = ZOOKEEPER_SERVER_PORT.get(giraphConf); final String zkBasePath = giraphConf.get( GiraphConstants.BASE_ZNODE_KEY, "") + BspService.BASE_DIR; final String[] zkServerList; String zkServerListStr = giraphConf.getZookeeperList(); if (zkServerListStr.isEmpty()) { throw new IllegalStateException("GiraphZooKeeperAdmin requires a list " +
@Override public int run(String[] args) throws Exception { if (null == getConf()) { conf = new Configuration(); } GiraphConfiguration giraphConf = new GiraphConfiguration(getConf()); YamlConfiguration yamlConfig = new YamlConfiguration(args); yamlConfig.load(); yamlConfig.populateGiraphConfiguration(giraphConf); // set up job for various platforms final String arabesqueComputationName = giraphConf.get(io.arabesque.conf.Configuration.CONF_COMPUTATION_CLASS); final String jobName = "Arabesque: " + arabesqueComputationName; // run the job, collect results if (LOG.isDebugEnabled()) { LOG.debug("Attempting to run computation: " + arabesqueComputationName); } GiraphJob job = getJob(giraphConf, jobName); boolean verbose = yamlConfig.getBoolean("verbose"); return job.run(verbose) ? 0 : -1; }
/** * Check environment for Hadoop security token location in case we are * executing the Giraph job on a MRv1 Hadoop cluster. */ public void configureHadoopSecurity() { String hadoopTokenFilePath = System.getenv("HADOOP_TOKEN_FILE_LOCATION"); if (hadoopTokenFilePath != null) { set("mapreduce.job.credentials.binary", hadoopTokenFilePath); } }
/** * Helper to deal with computation class. * * @param conf Configuration * @param cmd CommandLine * @param computationClassName Name of computation * @throws ClassNotFoundException error finding class */ private static void handleComputationClass(GiraphConfiguration conf, CommandLine cmd, String computationClassName) throws ClassNotFoundException { if (computationClassName.endsWith("py")) { handleJythonComputation(conf, cmd, computationClassName); } else { conf.setComputationClass( (Class<? extends Computation>) Class.forName(computationClassName)); } }
giraphConf.addWorkerObserverClass(LogVersions.class); giraphConf.addMasterObserverClass(LogVersions.class); giraphConf.setWorkerConfiguration(workers, workers, 100.0f); prepareConfiguration(giraphConf, cmd);