private static void saveFeaturesRDD(JavaPairRDD<Integer,float[]> features, Path path, Broadcast<? extends Map<Integer,String>> bIndexToID) { log.info("Saving features RDD to {}", path); features.map(keyAndVector -> { String id = bIndexToID.value().get(keyAndVector._1()); float[] vector = keyAndVector._2(); return TextUtils.joinJSON(Arrays.asList(id, vector)); }).saveAsTextFile(path.toString(), GzipCodec.class); }
public static void main(String[] args) throws Exception { if (args.length != 3) { throw new Exception("Usage BasicLoadCsv sparkMaster csvInputFile csvOutputFile key"); } String master = args[0]; String csvInput = args[1]; String outputFile = args[2]; final String key = args[3]; JavaSparkContext sc = new JavaSparkContext( master, "loadwholecsv", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaPairRDD<String, String> csvData = sc.wholeTextFiles(csvInput); JavaRDD<String[]> keyedRDD = csvData.flatMap(new ParseLine()); JavaRDD<String[]> result = keyedRDD.filter(new Function<String[], Boolean>() { public Boolean call(String[] input) { return input[0].equals(key); }}); result.saveAsTextFile(outputFile); } }
public static void main(String[] args) throws Exception { if (args.length != 3) { throw new Exception("Usage BasicLoadJson [sparkMaster] [jsoninput] [jsonoutput]"); } String master = args[0]; String fileName = args[1]; String outfile = args[2]; JavaSparkContext sc = new JavaSparkContext( master, "basicloadjson", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<String> input = sc.textFile(fileName); JavaRDD<Person> result = input.mapPartitions(new ParseJson()).filter(new LikesPandas()); JavaRDD<String> formatted = result.mapPartitions(new WriteJson()); formatted.saveAsTextFile(outfile); } }
@Test public void textFiles() throws IOException { String outputDir = new File(tempDir, "output").getAbsolutePath(); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); rdd.saveAsTextFile(outputDir); // Read the plain text file and check it's OK File outputFile = new File(outputDir, "part-00000"); String content = Files.toString(outputFile, StandardCharsets.UTF_8); assertEquals("1\n2\n3\n4\n", content); // Also try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); JavaRDD<String> readRDD = sc.textFile(outputDir); assertEquals(expected, readRDD.collect()); }
@Test public void textFilesCompressed() throws IOException { String outputDir = new File(tempDir, "output").getAbsolutePath(); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); rdd.saveAsTextFile(outputDir, DefaultCodec.class); // Try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); JavaRDD<String> readRDD = sc.textFile(outputDir); assertEquals(expected, readRDD.collect()); }
@Test public void textFilesCompressed() throws IOException { String outputDir = new File(tempDir, "output").getAbsolutePath(); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); rdd.saveAsTextFile(outputDir, DefaultCodec.class); // Try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); JavaRDD<String> readRDD = sc.textFile(outputDir); assertEquals(expected, readRDD.collect()); }
@Test public void textFilesCompressed() throws IOException { String outputDir = new File(tempDir, "output").getAbsolutePath(); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); rdd.saveAsTextFile(outputDir, DefaultCodec.class); // Try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); JavaRDD<String> readRDD = sc.textFile(outputDir); assertEquals(expected, readRDD.collect()); }
@Test public void textFiles() throws IOException { String outputDir = new File(tempDir, "output").getAbsolutePath(); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); rdd.saveAsTextFile(outputDir); // Read the plain text file and check it's OK File outputFile = new File(outputDir, "part-00000"); String content = Files.toString(outputFile, StandardCharsets.UTF_8); assertEquals("1\n2\n3\n4\n", content); // Also try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); JavaRDD<String> readRDD = sc.textFile(outputDir); assertEquals(expected, readRDD.collect()); }
json_only.saveAsTextFile(outputPath, org.apache.hadoop.io.compress.GzipCodec.class); } else { json_only.saveAsTextFile(outputPath);
@Test public void textFiles() throws IOException { String outputDir = new File(tempDir, "output").getAbsolutePath(); JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4)); rdd.saveAsTextFile(outputDir); // Read the plain text file and check it's OK File outputFile = new File(outputDir, "part-00000"); String content = Files.toString(outputFile, StandardCharsets.UTF_8); assertEquals("1\n2\n3\n4\n", content); // Also try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); JavaRDD<String> readRDD = sc.textFile(outputDir); assertEquals(expected, readRDD.collect()); }
callSigns.saveAsTextFile(outputDir + "/callsigns"); System.out.println("Blank lines: "+ blankLines.value());
lines.saveAsTextFile("/output/1"); partitions.saveAsTextFile("/output/4");
lines.saveAsTextFile("/output/1");
lines.saveAsTextFile("/output/1"); partitions.saveAsTextFile("/output/4");
records.saveAsTextFile("/output/2");
lines.saveAsTextFile("/output/1");
biosets.saveAsTextFile("/ttest/output/1");
records.saveAsTextFile("/output/2");
biosets.saveAsTextFile("/ttest/output/1");
R.saveAsTextFile(outputPath+"/R"); JavaRDD<String> S = session.read().textFile(datasetS).javaRDD(); S.saveAsTextFile(outputPath+"/S");