@Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { return new BlockLocation[] { new LocalBlockLocation(hostName, file.getLen()) }; }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { final FileSystem fs = file.getPath().getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += blockSize) { long remainingLength = Math.min(pos + blockSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the files %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, Arrays.toString(getFilePaths()), minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = last.getPath().getFileSystem().getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[inputSplits.size()]); }
@Override public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception { FileSystem fileSystem = FileSystem.get(new URI(path)); while (isRunning) { List<String> files = listNewFiles(fileSystem); for (String filePath : files) { if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) { ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L)); offsetOfFiles.put(filePath, -1L); } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) { long offset = 0; long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen(); if (offsetOfFiles.containsKey(filePath)) { offset = offsetOfFiles.get(filePath); } ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize)); offsetOfFiles.put(filePath, fileSize); LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize); } } Thread.sleep(interval); } }
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
len += s.getLen();
private long fileSize(Path path) throws IOException { return path.getFileSystem().getFileStatus(path).getLen(); }
len += s.getLen();
if(acceptFile(dir)) { files.add(dir); length += dir.getLen(); testForUnsplittable(dir); } else {
totalLength += pathFile.getLen(); for (final FileStatus file : files) { final FileSystem fs = file.getPath().getFileSystem(); final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, file.getLen()); Set<String> hosts = new HashSet<String>(); for(BlockLocation block : blocks) { hosts.addAll(Arrays.asList(block.getHosts())); long len = file.getLen(); if(testForUnsplittable(file)) { len = READ_WHOLE_SPLIT_FLAG; final long len = file.getLen(); final long blockSize = file.getBlockSize();
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
private Map<Path, String> getFileContentByPath(Path directory) throws Exception { Map<Path, String> contents = new HashMap<>(); final FileStatus[] filesInBucket = getFileSystem().listStatus(directory); for (FileStatus file : filesInBucket) { final long fileLength = file.getLen(); byte[] serContents = new byte[(int) fileLength]; FSDataInputStream stream = getFileSystem().open(file.getPath()); stream.read(serContents); contents.put(file.getPath(), new String(serContents, StandardCharsets.UTF_8)); } return contents; }
HttpHeaders.setHeader(response, CACHE_CONTROL, "private"); HttpHeaders.setHeader(response, CONTENT_TYPE, "application/octet-stream"); HttpHeaders.setContentLength(response, status.getLen());
@Override public void go() throws Exception { try (FSDataInputStream stream = fs.open(path)) { assertTrue(fs.getNumberOfOpenInputStreams() <= maxConcurrentInputStreams); assertTrue(fs.getTotalNumberOfOpenStreams() <= maxConcurrentStreamsTotal); final byte[] readBuffer = new byte[(int) fs.getFileStatus(path).getLen() - 1]; assertTrue(stream.read(readBuffer) != -1); waitTillWokenUp(); // try to write one more thing, which might/should fail with an I/O exception //noinspection ResultOfMethodCallIgnored stream.read(); } } }
@Override public long getLen() { return fileStatus.getLen(); }
@Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { return new BlockLocation[] { new LocalBlockLocation(hostName, file.getLen()) }; }
@Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { return new BlockLocation[] { new LocalBlockLocation(hostName, file.getLen()) }; }
/** * Returns the file size in bytes. * * @return The file size in bytes. * @throws IOException Thrown if the file system cannot be accessed. */ protected long getFileSize() throws IOException { return getFileSystem().getFileStatus(filePath).getLen(); } }