@Override public void process(InputStream in, OutputStream out) throws IOException { try (JSONFileReader<Record> reader = new JSONFileReader<>( in, schema, Record.class)) { reader.initialize(); try (DataFileWriter<Record> w = writer.create(schema, out)) { while (reader.hasNext()) { try { Record record = reader.next(); w.append(record); written.incrementAndGet(); } catch (final DatasetRecordException e) { failures.add(e); } } } } } });
@Override public void process(InputStream in, OutputStream out) throws IOException { try (CSVFileReader<Record> reader = new CSVFileReader<>( in, props, schema, Record.class)) { reader.initialize(); try (DataFileWriter<Record> w = writer.create(schema, out)) { while (reader.hasNext()) { try { Record record = reader.next(); w.append(record); written.incrementAndGet(); } catch (DatasetRecordException e) { failures.add(e); } } } } } });
@Override public void process(InputStream in) throws IOException { avroSchema.set(CSVUtil .inferSchema( context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), in, props) .toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean())); } });
CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(inputFlowFile).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(inputFlowFile).getValue()) .quote(context.getProperty(QUOTE_STRING).evaluateAttributeExpressions(inputFlowFile).getValue()) .escape(context.getProperty(ESCAPE_STRING).evaluateAttributeExpressions(inputFlowFile).getValue()) .linesToSkip(context.getProperty(HEADER_LINE_SKIP_COUNT).evaluateAttributeExpressions(inputFlowFile).asInteger()) .header(header.get()) .hasHeader(hasHeader.get()) .build();
public DurableParquetAppender(FileSystem fs, Path path, Schema schema, Configuration conf, CompressionType compressionType) { this.fs = fs; this.path = path; this.schema = schema; this.avroPath = avroPath(path); this.avroAppender = new AvroAppender<E>( fs, avroPath, schema, CompressionType.Snappy); this.parquetAppender = new ParquetAppender<E>( fs, path, schema, conf, compressionType); }
@Override public void signalReady() { if (signalManager != null) { signalManager.signalReady(getConstraints()); } }
@Override public boolean isReady() { if (signalManager != null) { long readyTimestamp = signalManager.getReadyTimestamp(getConstraints()); return readyTimestamp != -1; } return false; } }
/** * Imports an existing schema stored at the given path. This * is generally used to bring in schemas written by previous * versions of this library. * * @param schemaPath A path to a schema to import * @return The URI of the schema file managed by this manager. */ public URI importSchema(Path schemaPath) { Schema schema = loadSchema(schemaPath); return writeSchema(schema); }
@Override public void run() { FileSystemDatasets.viewForUri( dataset, "file:/tmp/datasets/ns/test2/y=2014/m=03/d=14"); } });
@Override public void run() { reader.next(); } });
public Path getDirectory(Dataset<?> dataset) { if (dataset instanceof FileSystemDataset) { return ((FileSystemDataset<?>) dataset).getDirectory(); } return null; }
protected static PathFilter hidden() { final PathFilter notHidden = PathFilters.notHidden(); return new PathFilter() { @Override public boolean accept(Path path) { return !notHidden.accept(path); } }; } }
@Override public Void call() throws IOException { count(fs, path); return null; } });
public static Schema inferSchema(String name, InputStream incoming, CSVProperties props, Set<String> requiredFields) throws IOException { return inferSchemaInternal(name, incoming, props, requiredFields, false); }
CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue()) .quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue()) .escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue()) .hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean()) .linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger()) .build();
@Override public void run() { FileSystemDatasets.viewForUri( dataset, "hdfs:/tmp/datasets/ns/test/y=2014/m=03/d=14"); } });
@Override public void run() { reader.next(); } });
@Override public Void call() throws IOException { count(fs, path); return null; } });