/** * Set number of columns into the given configuration. * @param conf * configuration instance which need to set the column number * @param columnNum * column number for RCFile's Writer * */ public static void setColumnNumber(Configuration conf, int columnNum) { assert columnNum > 0; RCFileOutputFormat.setColumnNumber(conf, columnNum); }
/** {@inheritDoc} */ @Override public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path outputPath = getWorkOutputPath(job); FileSystem fs = outputPath.getFileSystem(job); Path file = new Path(outputPath, name); CompressionCodec codec = null; if (getCompressOutput(job)) { Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job); } final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec); return new RecordWriter<WritableComparable, BytesRefArrayWritable>() { @Override public void close(Reporter reporter) throws IOException { out.close(); } @Override public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException { out.append(value); } }; }
private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(false); Optional<String> codecName = compression.getCodecName(); codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s)); return new RCFileOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, codecName.isPresent(), createTableProperties("test", columnObjectInspector.getTypeName()), () -> {}); }
@Test(enabled = false) public void testRcBinaryPageSource() throws Exception { HiveOutputFormat<?, ?> outputFormat = new RCFileOutputFormat(); InputFormat<?, ?> inputFormat = new RCFileInputFormat<>(); @SuppressWarnings("deprecation") SerDe serde = new LazyBinaryColumnarSerDe(); File file = File.createTempFile("presto_test", "rc-binary"); file.delete(); try { FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null, TEST_COLUMNS, NUM_ROWS); testPageSourceFactory(new RcFilePageSourceFactory(TYPE_MANAGER), split, inputFormat, serde, TEST_COLUMNS); } finally { //noinspection ResultOfMethodCallIgnored file.delete(); } }
@Test(enabled = false) public void testRcTextPageSource() throws Exception { HiveOutputFormat<?, ?> outputFormat = new RCFileOutputFormat(); InputFormat<?, ?> inputFormat = new RCFileInputFormat<>(); @SuppressWarnings("deprecation") SerDe serde = new ColumnarSerDe(); File file = File.createTempFile("presto_test", "rc-binary"); file.delete(); try { FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null, TEST_COLUMNS, NUM_ROWS); testPageSourceFactory(new RcFilePageSourceFactory(TYPE_MANAGER), split, inputFormat, serde, TEST_COLUMNS); } finally { //noinspection ResultOfMethodCallIgnored file.delete(); } }
RCFileOutputFormat.setColumnNumber(jc, cols.length); final RCFile.Writer outWriter = Utilities.createRCFileWriter(jc, finalOutPath.getFileSystem(jc), finalOutPath, isCompressed, progress);
/** {@inheritDoc} */ @Override public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path outputPath = getWorkOutputPath(job); FileSystem fs = outputPath.getFileSystem(job); Path file = new Path(outputPath, name); CompressionCodec codec = null; if (getCompressOutput(job)) { Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job); } final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec); return new RecordWriter<WritableComparable, BytesRefArrayWritable>() { @Override public void close(Reporter reporter) throws IOException { out.close(); } @Override public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException { out.append(value); } }; }
private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(false); Optional<String> codecName = compression.getCodecName(); codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s)); return new RCFileOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, codecName.isPresent(), createTableProperties("test", columnObjectInspector.getTypeName()), () -> {}); }
@Test public void testRCBinary() throws Exception { List<TestColumn> testColumns = ImmutableList.copyOf(filter(TEST_COLUMNS, testColumn -> { // RC file does not support complex type as key of a map return !testColumn.getName().equals("t_map_null_key_complex_key_value"); })); HiveOutputFormat<?, ?> outputFormat = new RCFileOutputFormat(); InputFormat<?, ?> inputFormat = new RCFileInputFormat<>(); @SuppressWarnings("deprecation") SerDe serde = new LazyBinaryColumnarSerDe(); File file = File.createTempFile("presto_test", "rc-binary"); try { FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null, testColumns, NUM_ROWS); testCursorProvider(new ColumnarBinaryHiveRecordCursorProvider(), split, inputFormat, serde, testColumns, NUM_ROWS); testCursorProvider(new GenericHiveRecordCursorProvider(), split, inputFormat, serde, testColumns, NUM_ROWS); } finally { //noinspection ResultOfMethodCallIgnored file.delete(); } }
RCFileOutputFormat.setColumnNumber(conf, columns);
/** {@inheritDoc} */ @Override public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path outputPath = getWorkOutputPath(job); FileSystem fs = outputPath.getFileSystem(job); Path file = new Path(outputPath, name); CompressionCodec codec = null; if (getCompressOutput(job)) { Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job); } final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec); return new RecordWriter<WritableComparable, BytesRefArrayWritable>() { @Override public void close(Reporter reporter) throws IOException { out.close(); } @Override public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException { out.append(value); } }; }
private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(false); Optional<String> codecName = compression.getCodecName(); codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s)); return new RCFileOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, codecName.isPresent(), createTableProperties("test", columnObjectInspector.getTypeName()), () -> {}); }
@Test public void testRCText() throws Exception { List<TestColumn> testColumns = ImmutableList.copyOf(filter(TEST_COLUMNS, testColumn -> { return !testColumn.getName().equals("t_struct_null") // TODO: This is a bug in the RC text reader && !testColumn.getName().equals("t_map_null_key_complex_key_value"); // RC file does not support complex type as key of a map })); HiveOutputFormat<?, ?> outputFormat = new RCFileOutputFormat(); InputFormat<?, ?> inputFormat = new RCFileInputFormat<>(); @SuppressWarnings("deprecation") SerDe serde = new ColumnarSerDe(); File file = File.createTempFile("presto_test", "rc-text"); try { FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null, testColumns, NUM_ROWS); testCursorProvider(new ColumnarTextHiveRecordCursorProvider(), split, inputFormat, serde, testColumns, NUM_ROWS); testCursorProvider(new GenericHiveRecordCursorProvider(), split, inputFormat, serde, testColumns, NUM_ROWS); } finally { //noinspection ResultOfMethodCallIgnored file.delete(); } }
RCFileOutputFormat.setColumnNumber(jc, cols.length); final RCFile.Writer outWriter = Utilities.createRCFileWriter(jc, finalOutPath.getFileSystem(jc), finalOutPath, isCompressed, progress);
/** {@inheritDoc} */ @Override public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path outputPath = getWorkOutputPath(job); FileSystem fs = outputPath.getFileSystem(job); if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); } Path file = new Path(outputPath, name); CompressionCodec codec = null; if (getCompressOutput(job)) { Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job); } final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec); return new RecordWriter<WritableComparable, BytesRefArrayWritable>() { @Override public void close(Reporter reporter) throws IOException { out.close(); } @Override public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException { out.append(value); } }; }
private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(false); Optional<String> codecName = compression.getCodecName(); codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s)); return new RCFileOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, codecName.isPresent(), createTableProperties("test", columnObjectInspector.getTypeName()), () -> {}); }
codec = key.getCodec(); columnNumber = key.getKeyBuffer().getColumnNumber(); RCFileOutputFormat.setColumnNumber(jc, columnNumber); outWriter = new RCFile.Writer(fs, jc, getOutPath(), null, codec);
codec = key.getCodec(); columnNumber = key.getKeyBuffer().getColumnNumber(); RCFileOutputFormat.setColumnNumber(jc, columnNumber); outWriter = new RCFile.Writer(fs, jc, outPath, null, codec);
fs.delete(testFile, true); Configuration cloneConf = new Configuration(conf); RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());