switch (poi.getPrimitiveCategory()) { case INT: return Bytes.toBytes(((IntWritable) writable).get()); case BOOLEAN: return Bytes.toBytes(((BooleanWritable) writable).get()); case LONG: return Bytes.toBytes(((LongWritable) writable).get()); case FLOAT: return Bytes.toBytes(((FloatWritable) writable).get()); case DOUBLE: return Bytes.toBytes(((DoubleWritable) writable).get()); case SHORT: return Bytes.toBytes(((ShortWritable) writable).get()); case STRING: return Bytes.toBytes(((Text) writable).toString()); case BYTE: return Bytes.toBytes(((ByteWritable) writable).get());
private Writable allocateWritable(Type type) { switch (type) { case BYTE: return new ByteWritable(); case BOOL: return new BooleanWritable(); case INT: return new IntWritable(); case SHORT: return new ShortWritable(); case LONG: return new LongWritable(); case FLOAT: return new FloatWritable(); case DOUBLE: return new DoubleWritable(); case STRING: return new Text(); default: assert false; // not supported } return null; }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); MultiOutputFormat.write("out1", index, word, context); MultiOutputFormat.write("out2", word, index, context); index.set(index.get() + 1); } } }
@Override public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v) throws Exception { v.f0 = new IntWritable(v.f0.get() % 5); return v; } }
@Override public void reduce(IntWritable k, Iterator<IntWritable> v, OutputCollector<IntWritable, IntWritable> out, Reporter r) throws IOException { while (v.hasNext()) { out.collect(new IntWritable(k.get() % 4), v.next()); } }
configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); Path seqOutPath = new Path(outDir, "out2/part-r-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = "Hello World".split(" "); Assert.assertEquals(words.length, textOutput.length); Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 2), value.get());
int rownum=1000; Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); localFs.delete(outputPath, true); PrintWriter pw = new PrintWriter( new OutputStreamWriter(localFs.create(inputPath))); Random r = new Random(1000L); boolean firstRow = true; assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000"); assertTrue(localFs.exists(outputFilePath)); Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs)); soi.getAllStructFieldRefs().get(1)); assertEquals(intWritable.get(), firstIntValue); assertEquals(text.toString(), firstStringValue); localFs.delete(outputPath, true);
configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); Path seqOutPath = new Path(outDir, "out2/part-m-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = fileContent.split(" "); Assert.assertEquals(words.length, textOutput.length); Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 1), value.get());
throws IOException, InterruptedException { int index = key.get(); String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath(); context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath)); conf = HadoopUtil.getCurrentConfiguration(); reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); if (keyW.get() == 0L) { if (cuboidHLLMap.get(keyW.get()) != null) { cuboidHLLMap.get(keyW.get()).merge(hll); } else { CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(statOutputPath, FSDataInputStream fis = fs.open(statisticsFilePath); context.write(new IntWritable(-1), new Text(""));
FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); Text key = new Text(); IntWritable value = new IntWritable(); SequenceFile.Writer writer = null; try { for (int i = 1; i < N; i++) { int randomInt = randomNumberGenerator.nextInt(100); key.set("cat" + i); value.set(randomInt); System.out.printf("%s\t%s\n", key, value); writer.append(key, value);
AcidUtils.getAcidOperationalProperties(options.getConfiguration()); this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); this.deleteEventWriter = null; FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); Path formatFile = new Path(path, ACID_FORMAT); if(!fs.exists(formatFile)) { try (FSDataOutputStream strm = fs.create(formatFile, false)) { strm.writeInt(ORC_ACID_VERSION); } catch (IOException ioe) { flushLengths = fs.create(OrcAcidUtils.getSideFile(this.path), true, 8, options.getReporter()); } else {
row.setFieldValue(0, new IntWritable(0)); row.setFieldValue(1, new LongWritable(1)); row.setFieldValue(2, new IntWritable(0)); LongWritable rowId = new LongWritable(); row.setFieldValue(3, rowId); row.setFieldValue(4, new LongWritable(1)); OrcStruct rowField = new OrcStruct(3); row.setFieldValue(5, rowField); IntWritable a = new IntWritable(); rowField.setFieldValue(0, a); OrcStruct b = new OrcStruct(1); rowField.setFieldValue(1, b); IntWritable c = new IntWritable(); b.setFieldValue(0, c); Text d = new Text(); rowField.setFieldValue(2, d); rowId.set(r); a.set(r * 42); c.set(r * 10001); d.set(Integer.toHexString(r)); indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION, 1, 0, rowId.get()); writer.addRow(row);
Path baseDir = new Path(workDir, "base_00100"); testFilePath = new Path(baseDir, "bucket_00000"); fs.mkdirs(baseDir); fs.delete(testFilePath, true); TypeDescription fileSchema = TypeDescription.fromString("struct<operation:int," + ByteBuffer.wrap("0,0,999".getBytes(StandardCharsets.UTF_8))); writer.close(); long fileLength = fs.getFileStatus(testFilePath).getLen(); assertEquals("writeid " + record, 1, id.getWriteId()); assertEquals("a " + record, 42 * record, ((IntWritable) struct.getFieldValue(0)).get()); assertEquals(null, struct.getFieldValue(1)); assertEquals("d " + record, assertEquals("writeid " + record, 1, id.getWriteId()); assertEquals("a " + record, 42 * record, ((IntWritable) struct.getFieldValue(0)).get()); assertEquals(null, struct.getFieldValue(1)); assertEquals("d " + record,
private static int getNumTerms(Configuration conf, Path dictionaryPath) throws IOException { FileSystem fs = dictionaryPath.getFileSystem(conf); Text key = new Text(); IntWritable value = new IntWritable(); int maxTermId = -1; for (FileStatus stat : fs.globStatus(dictionaryPath)) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, stat.getPath(), conf); while (reader.next(key, value)) { maxTermId = Math.max(maxTermId, value.get()); } } return maxTermId + 1; }
ObjectInspectorFactory.ObjectInspectorOptions.JAVA); JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combination", inspector, false, 1); Path partDir = new Path(conf.get("mapred.input.dir")); Writer writer = OrcFile.createWriter(new Path(partDir, "0_0"), OrcFile.writerOptions(conf).blockPadding(false) .bufferSize(1024).inspector(inspector)); for(int i=0; i < 20; i++) { assertEquals(true, reader.next(key, value)); assertEquals(i, ((IntWritable) value.getFieldValue(0)).get());
SequenceFile.Reader read = new SequenceFile.Reader(fs, new Path("<path do dictionary>"), conf); IntWritable dicKey = new IntWritable(); Text text = new Text(); HashMap<Integer, String> dictionaryMap = new HashMap(); while (read.next(text, dicKey)) { dictionaryMap.put(Integer.parseInt(dicKey.toString()), text.toString()); } read.close();
public void map (final LongWritable key, final Text value, final OutputCollector<IntWritable, Text> output, final Reporter reporter) throws IOException { final String[] line = value.toString().split("\t"); output.collect(new IntWritable(Integer.parseInt(line[0])), new Text(line[1]) ); } }
SequenceFile.Reader.file(new Path(fileName)))) { Text key = new Text(); IntWritable val = new IntWritable();
private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException { Configuration conf = new Configuration(); try { if (fs.exists(file)) { fs.delete(file, false); } SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class); for (int i = 0; i < rowCount; i++) { w.append(new IntWritable(i), new Text("line " + i)); } w.close(); System.out.println("done"); } catch (IOException e) { e.printStackTrace(); } }