/** * Returns a Cells RDD from HDFS. * @param config HDFS ExtractorConfig. * @return Cells RDD. */ public RDD<Cells> createHDFSRDD(ExtractorConfig<Cells> config) { Serializable host = config.getValues().get(ExtractorConstants.HOST); Serializable port = config.getValues().get(ExtractorConstants.PORT); Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH); final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this); String filePath = path.toString(); if (config.getExtractorImplClassName().equals(ExtractorConstants.HDFS)) { filePath = ExtractorConstants.HDFS_PREFIX + host.toString() + ":" + port + path.toString(); } return createRDDFromFilePath(filePath, textFileDataTable); }
/** * Returns a Cells RDD from HDFS. * @param config HDFS ExtractorConfig. * @return Cells RDD. */ public RDD<Cells> createHDFSRDD(ExtractorConfig<Cells> config) { Serializable host = config.getValues().get(ExtractorConstants.HOST); Serializable port = config.getValues().get(ExtractorConstants.PORT); Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH); final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this); String filePath = path.toString(); if (config.getExtractorImplClassName().equals(ExtractorConstants.HDFS)) { filePath = ExtractorConstants.HDFS_PREFIX + host.toString() + ":" + port + path.toString(); } return createRDDFromFilePath(filePath, textFileDataTable); }
public static TextFileDataTable createTextFileMetaDataFromConfig(ExtractorConfig<Cells> extractorConfig, DeepSparkContext deepSparkContext) { Serializable separator = extractorConfig.getValues().get(ExtractorConstants.FS_FILE_SEPARATOR); String catalogName = (String) extractorConfig.getValues().get(ExtractorConstants.CATALOG); String tableName = (String) extractorConfig.getValues().get(ExtractorConstants.TABLE); final String splitSep = separator.toString(); if(extractorConfig.getValues().get(ExtractorConstants.FS_FILEDATATABLE)!=null ){ final TextFileDataTable textFileDataTable = (TextFileDataTable)extractorConfig.getValues().get(ExtractorConstants.FS_FILEDATATABLE); return textFileDataTable; }else if(extractorConfig.getValues().get(ExtractorConstants.FS_SCHEMA)!=null){ final ArrayList<SchemaMap<?>> columns = (ArrayList<SchemaMap<?>>) extractorConfig.getValues().get (ExtractorConstants.FS_SCHEMA); final TextFileDataTable textFileDataTableTemp = new TextFileDataTable(new TableName(catalogName, tableName), columns); textFileDataTableTemp.setLineSeparator(splitSep); return textFileDataTableTemp; }else{ final TextFileDataTable textFileDataTableTmp = createTextFileFromSchemaFile(buildFilePath(extractorConfig), deepSparkContext); textFileDataTableTmp.setLineSeparator(splitSep); return textFileDataTableTmp; } }
public static TextFileDataTable createTextFileMetaDataFromConfig(ExtractorConfig<Cells> extractorConfig, DeepSparkContext deepSparkContext) { Serializable separator = extractorConfig.getValues().get(ExtractorConstants.FS_FILE_SEPARATOR); String catalogName = (String) extractorConfig.getValues().get(ExtractorConstants.CATALOG); String tableName = (String) extractorConfig.getValues().get(ExtractorConstants.TABLE); final String splitSep = separator.toString(); if(extractorConfig.getValues().get(ExtractorConstants.FS_FILEDATATABLE)!=null ){ final TextFileDataTable textFileDataTable = (TextFileDataTable)extractorConfig.getValues().get(ExtractorConstants.FS_FILEDATATABLE); return textFileDataTable; }else if(extractorConfig.getValues().get(ExtractorConstants.FS_SCHEMA)!=null){ final ArrayList<SchemaMap<?>> columns = (ArrayList<SchemaMap<?>>) extractorConfig.getValues().get (ExtractorConstants.FS_SCHEMA); final TextFileDataTable textFileDataTableTemp = new TextFileDataTable(new TableName(catalogName, tableName), columns); textFileDataTableTemp.setLineSeparator(splitSep); return textFileDataTableTemp; }else{ final TextFileDataTable textFileDataTableTmp = createTextFileFromSchemaFile(buildFilePath(extractorConfig), deepSparkContext); textFileDataTableTmp.setLineSeparator(splitSep); return textFileDataTableTmp; } }
/** * Returns a Cells RDD from S3 fileSystem. * @param config Amazon S3 ExtractorConfig. * @return RDD of Cells. */ public RDD<Cells> createS3RDD(ExtractorConfig<Cells> config) { Serializable bucket = config.getValues().get(ExtractorConstants.S3_BUCKET); Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH); final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this); String filePath = path.toString(); if (config.getExtractorImplClassName().equals(ExtractorConstants.S3)) { filePath = ExtractorConstants.S3_PREFIX + bucket.toString() + path.toString(); } Configuration hadoopConf = this.sc().hadoopConfiguration(); hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem"); hadoopConf.set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID)); hadoopConf.set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY)); return createRDDFromFilePath(filePath, textFileDataTable); }
/** * Returns a Cells RDD from S3 fileSystem. * @param config Amazon S3 ExtractorConfig. * @return RDD of Cells. */ public RDD<Cells> createS3RDD(ExtractorConfig<Cells> config) { Serializable bucket = config.getValues().get(ExtractorConstants.S3_BUCKET); Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH); final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this); String filePath = path.toString(); if (config.getExtractorImplClassName().equals(ExtractorConstants.S3)) { filePath = ExtractorConstants.S3_PREFIX + bucket.toString() + path.toString(); } Configuration hadoopConf = this.sc().hadoopConfiguration(); hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem"); hadoopConf.set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID)); hadoopConf.set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY)); return createRDDFromFilePath(filePath, textFileDataTable); }
@Override public ESDeepJobConfig<T> initialize(ExtractorConfig extractorConfig) { super.initialize(extractorConfig); Map<String, String> values = extractorConfig.getValues(); if (values.get(INPUT_COLUMNS) != null) { inputColumns(extractorConfig.getStringArray(INPUT_COLUMNS)); } if (values.get(FILTER_QUERY) != null) { filterQuery(extractorConfig.getFilterArray(FILTER_QUERY)); } this.initialize(); return this; }
@Test public void cloneObjectWithParentsTest(){ Map<String, Serializable> map = new HashMap<>(); map.put("key1","val1"); map.put("key2","val2"); map.put("key3","val3"); ExtractorConfig<Cells> extractorConfig = new ExtractorConfig(); extractorConfig.setExtractorImplClassName("testExtractor"); extractorConfig.setValues(map); ExtractorConfig<Cells> clone = extractorConfig.clone(); assertEquals(clone.getExtractorImplClassName(), extractorConfig.getExtractorImplClassName()); assertEquals(clone.getValues(), extractorConfig.getValues()); map.remove("key1"); assertEquals(map.size(),2); assertNotEquals(clone.getValues(), extractorConfig.getValues()); }
super.initialize(extractorConfig); Map<String, Serializable> values = extractorConfig.getValues();
setPartitionId(extractorConfig.getPartitionId()); Map<String, Serializable> values = extractorConfig.getValues();
setPartitionId(extractorConfig.getPartitionId()); Map<String, Serializable> values = extractorConfig.getValues();