@Override public void endWindow() { super.endWindow(); flag = true; } }
@Override public void beginWindow(long windowId) { super.beginWindow(windowId); receivedTuples = false; }
@Override public void setup(Context.OperatorContext context) { currentFileName = (part == 0) ? FILE_NAME_PREFIX : FILE_NAME_PREFIX + "." + part; super.setup(context); }
/** * File open callback; wrap the file input stream in a buffered reader for reading lines * @param curPath The path to the file just opened */ @Override protected InputStream openFile(Path curPath) throws IOException { LOG.info("openFile: curPath = {}", curPath); path = curPath; InputStream is = super.openFile(path); br = new BufferedReader(new InputStreamReader(is)); return is; }
/** * File close callback; close buffered reader * @param is File input stream that will imminently be closed */ @Override protected void closeFile(InputStream is) throws IOException { super.closeFile(is); br.close(); br = null; path = null; }
@Override protected void processTuple(T tuple) { //if (writeFilesFlag) { //} super.processTuple(tuple); }
/** * {@inheritDoc} * Invoke requestFinalize() to create the output file with the desired name without decorations. */ @Override public void endWindow() { if (null != fileName) { requestFinalize(fileName); } super.endWindow(); }
@Override public void populateDAG(DAG dag, Configuration conf) { S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule()); HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule()); dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) .setLocality(Locality.THREAD_LOCAL); dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL); }
@Override public void beginWindow(long windowId) { super.beginWindow(windowId); errorCount = 0; recordCount = 0; }
@Override public void emitTuples() { if (flag) { super.emitTuples(); } }
@Override public void setup(Context.OperatorContext context) { super.setup(context); exactlyList = new ArrayList<>(); atLeastList = new ArrayList<>(); }
@Override public void setup(OperatorContext context) { super.setup(context); schema = MessageTypeParser.parseMessageType(parquetSchema); }
@Override public void populateDAG(DAG dag, Configuration conf) { FSInputModule inputModule = dag.addModule("HDFSInputModule", new FSInputModule()); S3OutputModule outputModule = dag.addModule("S3OutputModule", new S3OutputModule()); dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) .setLocality(DAG.Locality.CONTAINER_LOCAL); dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL); } }
@Override public void setup(OperatorContext context) { contextId = context.getValue(DAGContext.APPLICATION_NAME); outputFileName = File.separator + contextId + File.separator + "transactions.out.part"; super.setup(context); }
@Override protected InputStream openFile(Path path) throws IOException { InputStream is = super.openFile(path); br = new BufferedReader(new InputStreamReader(is)); return is; }
@Override protected void closeFile(InputStream is) throws IOException { super.closeFile(is); br.close(); br = null; }
@Override protected void processTuple(KeyValPair<String, Integer> tuple) { super.processTuple(tuple); receivedTuples = true; }
@Override public void endWindow() { super.endWindow(); //request for finalization if there is no input. This is done automatically if the file is rotated periodically // or has a size threshold. if (!receivedTuples && !endOffsets.isEmpty()) { requestFinalize(currentFileName); part++; currentFileName = FILE_NAME_PREFIX + "." + part; } } }
@Override public void setup(OperatorContext context) { super.setup(context); id = context.getId(); }
/** * Opens the file to read using GroupReadSupport */ @Override protected InputStream openFile(Path path) throws IOException { InputStream is = super.openFile(path); GroupReadSupport readSupport = new GroupReadSupport(); readSupport.init(configuration, null, schema); reader = new ParquetReader<>(path, readSupport); return is; }