public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new TestNaiveBayesDriver(), args); }
private boolean runMapReduce() throws IOException, InterruptedException, ClassNotFoundException { Path model = new Path(getOption("model")); HadoopUtil.cacheFiles(model, getConf()); //the output key is the expected value, the output value are the scores for all the labels Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class); //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels")); boolean complementary = hasOption("testComplementary"); testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary)); return testJob.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption(buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); boolean sequential = hasOption("runSequential"); boolean succeeded; if (sequential) { runSequential(); } else { succeeded = runMapReduce(); if (!succeeded) { return -1; Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); new SequenceFileDirIterable<Text, VectorWritable>(getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer);
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption(buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); boolean sequential = hasOption("runSequential"); boolean succeeded; if (sequential) { runSequential(); } else { succeeded = runMapReduce(); if (!succeeded) { return -1; Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); new SequenceFileDirIterable<>(getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer);
private boolean runMapReduce() throws IOException, InterruptedException, ClassNotFoundException { Path model = new Path(getOption("model")); HadoopUtil.cacheFiles(model, getConf()); //the output key is the expected value, the output value are the scores for all the labels Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class); //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels")); boolean complementary = hasOption("testComplementary"); testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary)); return testJob.waitForCompletion(true); }
public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new TestNaiveBayesDriver(), args); }
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption(buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); boolean complementary = hasOption("testComplementary"); boolean sequential = hasOption("runSequential"); if (sequential) { FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); AbstractNaiveBayesClassifier classifier; if (complementary) { new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class); Reader reader = new Reader(fs, getInputPath(), getConf()); Text key = new Text(); VectorWritable vw = new VectorWritable(); reader.close();
private boolean runMapReduce(Map<String, List<String>> parsedArgs) throws IOException, InterruptedException, ClassNotFoundException { Path model = new Path(getOption("model")); HadoopUtil.cacheFiles(model, getConf()); //the output key is the expected value, the output value are the scores for all the labels Job testJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, BayesTestMapper.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class); //testJob.getConfiguration().set(LABEL_KEY, getOption("--labels")); //boolean complementary = parsedArgs.containsKey("testComplementary"); //always result to false as key in hash map is "--testComplementary" boolean complementary = hasOption("testComplementary"); //or complementary = parsedArgs.containsKey("--testComplementary"); testJob.getConfiguration().set(COMPLEMENTARY, String.valueOf(complementary)); return testJob.waitForCompletion(true); }
public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new TestNaiveBayesDriver(), args); }
private void runSequential() throws IOException { boolean complementary = hasOption("testComplementary"); FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"), Text.class, VectorWritable.class)) { SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
private void runSequential() throws IOException { boolean complementary = hasOption("testComplementary"); FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); classifier = new StandardNaiveBayesClassifier(model); SequenceFile.Writer writer = SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"), Text.class, VectorWritable.class); new SequenceFileDirIterable<Text, VectorWritable>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf());