/** * Get the FS implementation for this table */ public FileSystem getFs() { if (fs == null) { fs = FSUtils.getFs(metaPath, hadoopConf.get()); } return fs; }
public static Configuration registerFileSystem(Path file, Configuration conf) { Configuration returnConf = new Configuration(conf); String scheme = FSUtils.getFs(file.toString(), conf).getScheme(); returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl", HoodieWrapperFileSystem.class.getName()); return returnConf; }
public static Configuration registerFileSystem(Path file, Configuration conf) { Configuration returnConf = new Configuration(conf); String scheme = FSUtils.getFs(file.toString(), conf).getScheme(); returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl", HoodieWrapperFileSystem.class.getName()); return returnConf; }
public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) { ParquetMetadata footer; try { // TODO(vc): Should we use the parallel reading version here? footer = ParquetFileReader .readFooter(getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath); } catch (IOException e) { throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e); } return footer; }
public HiveIncrPullSource(TypedProperties props, JavaSparkContext sparkContext, SchemaProvider schemaProvider) { super(props, sparkContext, schemaProvider); DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(Config.ROOT_INPUT_PATH_PROP)); this.incrPullRootPath = props.getString(Config.ROOT_INPUT_PATH_PROP); this.fs = FSUtils.getFs(incrPullRootPath, sparkContext.hadoopConfiguration()); }
public DFSSource(TypedProperties props, JavaSparkContext sparkContext, SchemaProvider schemaProvider) { super(props, sparkContext, schemaProvider); DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(Config.ROOT_INPUT_PATH_PROP)); this.fs = FSUtils.getFs(props.getString(Config.ROOT_INPUT_PATH_PROP), sparkContext.hadoopConfiguration()); }
public static Path convertToHoodiePath(Path file, Configuration conf) { try { String scheme = FSUtils.getFs(file.toString(), conf).getScheme(); return convertPathWithScheme(file, getHoodieScheme(scheme)); } catch (HoodieIOException e) { throw e; } }
public DFSSource(TypedProperties props, JavaSparkContext sparkContext, SchemaProvider schemaProvider) { super(props, sparkContext, schemaProvider); DataSourceUtils.checkRequiredProperties(props, Arrays.asList(Config.ROOT_INPUT_PATH_PROP)); this.fs = FSUtils.getFs(props.getString(Config.ROOT_INPUT_PATH_PROP), sparkContext.hadoopConfiguration()); }
public static void main(String[] args) throws Exception { // parse the params final HiveSyncConfig cfg = new HiveSyncConfig(); JCommander cmd = new JCommander(cfg, args); if (cfg.help || args.length == 0) { cmd.usage(); System.exit(1); } FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration()); HiveConf hiveConf = new HiveConf(); hiveConf.addResource(fs.getConf()); new HiveSyncTool(cfg, hiveConf, fs).syncHoodieTable(); } }
public static Path convertToHoodiePath(Path file, Configuration conf) { try { String scheme = FSUtils.getFs(file.toString(), conf).getScheme(); return convertPathWithScheme(file, getHoodieScheme(scheme)); } catch (HoodieIOException e) { throw e; } }
public HiveIncrPullSource(TypedProperties props, JavaSparkContext sparkContext, SchemaProvider schemaProvider) { super(props, sparkContext, schemaProvider); DataSourceUtils.checkRequiredProperties(props, Arrays.asList(Config.ROOT_INPUT_PATH_PROP)); this.incrPullRootPath = props.getString(Config.ROOT_INPUT_PATH_PROP); this.fs = FSUtils.getFs(incrPullRootPath, sparkContext.hadoopConfiguration()); }
@VisibleForTesting HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight, HoodieIndex index) { this.fs = FSUtils.getFs(clientConfig.getBasePath(), jsc.hadoopConfiguration()); this.jsc = jsc; this.config = clientConfig; this.index = index; this.metrics = new HoodieMetrics(config, config.getTableName()); this.rollbackInFlight = rollbackInFlight; }
public static void createCompactionRequestedFile(String basePath, String commitTime, Configuration configuration) throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeRequestedCompactionFileName(commitTime)); FileSystem fs = FSUtils.getFs(basePath, configuration); FSDataOutputStream os = fs.create(commitFile, true); os.close(); }
public static final void createInflightCleanFiles(String basePath, Configuration configuration, String... commitTimes) throws IOException { for (String commitTime : commitTimes) { Path commitFile = new Path((basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline .makeInflightCleanerFileName( commitTime))); FileSystem fs = FSUtils.getFs(basePath, configuration); FSDataOutputStream os = fs.create(commitFile, true); } }
@VisibleForTesting HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight, HoodieIndex index) { this.fs = FSUtils.getFs(clientConfig.getBasePath(), jsc.hadoopConfiguration()); this.jsc = jsc; this.config = clientConfig; this.index = index; this.metrics = new HoodieMetrics(config, config.getTableName()); this.rollbackInFlight = rollbackInFlight; }
private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath, String repairedOutputPath, String basePath) throws Exception { DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc), FSUtils.getFs(basePath, jsc.hadoopConfiguration())); job.fixDuplicates(true); return 0; }
public static void createCommitFile(String basePath, String commitTime, Configuration configuration) throws IOException { Path commitFile = new Path( basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime)); FileSystem fs = FSUtils.getFs(basePath, configuration); FSDataOutputStream os = fs.create(commitFile, true); HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); try { // Write empty commit metadata os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); } finally { os.close(); } }
@Before public void init() throws IOException { // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieBloomIndex")); // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); basePath = folder.getRoot().getAbsolutePath(); fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath); // We have some records to be tagged (two different partitions) schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8"); schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr)); }
@Before public void init() throws IOException { // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieGlobalBloomIndex")); // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); basePath = folder.getRoot().getAbsolutePath(); fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath); // We have some records to be tagged (two different partitions) schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8"); schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr)); }
@Before public void init() throws IOException { // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieCompactor")); // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); basePath = folder.getRoot().getAbsolutePath(); hadoopConf = HoodieTestUtils.getDefaultHadoopConf(); fs = FSUtils.getFs(basePath, hadoopConf); HoodieTestUtils.initTableType(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ); dataGen = new HoodieTestDataGenerator(); compactor = new HoodieRealtimeTableCompactor(); }