Refine search
/** * Returns a split for each store files directory using the block location * of each file as locality reference. */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<>(); List<FileStatus> files = listStatus(job); Text key = new Text(); for (FileStatus file: files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); LineReader reader = new LineReader(fs.open(path)); long pos = 0; int n; try { while ((n = reader.readLine(key)) > 0) { String[] hosts = getStoreDirHosts(fs, path); splits.add(new FileSplit(path, pos, n, hosts)); pos += n; } } finally { reader.close(); } } return splits; }
str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization long bytesConsumed = 0; if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0); if (bufferLength <= 0) { if (ambiguousByteCount > 0) { str.append(recordDelimiterBytes, 0, ambiguousByteCount); bytesConsumed += ambiguousByteCount; if (appendLength >= 0 && ambiguousByteCount > 0) { str.append(recordDelimiterBytes, 0, ambiguousByteCount); ambiguousByteCount = 0; unsetNeedAdditionalRecordAfterSplit();
in = fs.open(masterIndexPath); FileStatus masterStat = fs.getFileStatus(masterIndexPath); masterIndexTimestamp = masterStat.getModificationTime(); lin = new LineReader(in, getConf()); read = lin.readLine(line); while(read < masterStat.getLen()) { int b = lin.readLine(line); read += b; readStr = line.toString().split(" "); stores.add(new Store(Long.parseLong(readStr[2]), Long.parseLong(readStr[3]))); FSDataInputStream aIn = fs.open(archiveIndexPath); archiveIndexTimestamp = archiveStat.getModificationTime(); LineReader aLin; read = 0; aIn.seek(s.begin); aLin = new LineReader(aIn, getConf()); while (read + s.begin < s.end) { int tmp = aLin.readLine(line); parsed[0] = decodeFileName(parsed[0]); archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
FileSystem inFs = inPath.getFileSystem(job); if (!inFs.getFileStatus(inPath).isDir()) return; Path gindex_path = new Path(inPath, "_master.heap"); if (inFs.exists(gindex_path)) return; PrintStream gout = new PrintStream(inFs.create(gindex_path, false)); FileSystem outFs = outPath.getFileSystem(job); FileStatus[] results = outFs.listStatus(outPath); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); while (lineReader.readLine(text) > 0) { gout.println(text); lineReader.close();
FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { fileIn.seek(start); in = new LineReader(fileIn, job); start += in.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - start));
FileSystem outFs = outPath.getFileSystem(conf); FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() { @Override public boolean accept(Path path) { final byte[] NewLine = new byte[] {'\n'}; for (FileStatus f : resultFiles) { LineReader in = new LineReader(outFs.open(f.getPath())); while (in.readLine(tempLine) > 0) { destOut.write(tempLine.getBytes(), 0, tempLine.getLength()); destOut.write(NewLine); tempPartition.fromText(tempLine); wktOut.println(tempPartition.toWKT()); in.close(); outFs.delete(f.getPath(), false); // Delete the copied file
final int flen = (int) infile.getLen(); final Text line = new Text(); final Decompressor dcmp = CodecPool.getDecompressor(codec); try { codec.createInputStream(fs.open(infile.getPath()), dcmp, pos, flen, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (in.getAdjustedStart() >= flen) { final LineReader lreader = new LineReader(in); lreader.readLine(line); // ignore; likely partial if (in.getPos() >= flen) { break; lreader.readLine(line); final int seq1 = readLeadingInt(line); lreader.readLine(line); if (in.getPos() >= flen) { break; fs.delete(infile.getPath().getParent(), true);
super.commitJob(context); FileSystem fs = outPath.getFileSystem(context.getConfiguration()); FileStatus[] masterFiles = fs.listStatus(outPath, new PathFilter() { @Override public boolean accept(Path p) { Path mergedFilePath = new Path(outPath, "_master.heap"); PrintStream mergedFile = new PrintStream(fs.create(mergedFilePath)); Path htmlPath = new Path(outPath, "_master.html"); PrintStream htmlFile = new PrintStream(fs.create(htmlPath)); htmlFile.print("<html> <body>"); Text line = new Text2(); for (FileStatus masterFile : masterFiles) { LineReader reader = new LineReader(fs.open(masterFile.getPath())); while (reader.readLine(line) > 0) { mergedFile.println(line); String[] parts = line.toString().split(","); htmlFile.printf("<img src='%s' style='position: absolute; left: %s; top: %s; border: dotted 1px black;'/>\n", parts[2], parts[0], parts[1]); reader.close(); fs.delete(masterFile.getPath(), false);
/** * Creates a mappings file from the contents of a flat text file containing docid to docno * mappings. This method is used by {@link WikipediaDocnoMappingBuilder} internally. * * @param inputFile flat text file containing docid to docno mappings * @param outputFile output mappings file * @throws IOException */ static public void writeDocnoMappingData(FileSystem fs, String inputFile, int n, String outputFile) throws IOException { LOG.info("Writing " + n + " docids to " + outputFile); LineReader reader = new LineReader(fs.open(new Path(inputFile))); int cnt = 0; Text line = new Text(); FSDataOutputStream out = fs.create(new Path(outputFile), true); out.writeInt(n); for (int i = 0; i < n; i++) { reader.readLine(line); String[] arr = line.toString().split("\\t"); out.writeInt(Integer.parseInt(arr[0])); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " articles"); } } out.close(); reader.close(); LOG.info("Done!"); }
/** * Create a WKT file form an existing master file to use with GIS software. * @param fs * @param masterPath */ public static void generateMasterWKT(FileSystem fs, Path masterPath) throws IOException { // Write the WKT-formatted master file String name = masterPath.getName(); int lastDot = name.lastIndexOf('.'); String globalIndexExtension = name.substring(lastDot + 1); Path wktPath = new Path(masterPath.getParent(), "_"+globalIndexExtension+".wkt"); PrintStream wktOut = new PrintStream(fs.create(wktPath)); wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name"); Text tempLine = new Text2(); Partition tempPartition = new Partition(); LineReader in = new LineReader(fs.open(masterPath)); while (in.readLine(tempLine) > 0) { tempPartition.fromText(tempLine); wktOut.println(tempPartition.toWKT()); } in.close(); wktOut.close(); } }
@Override public void loadMapping(Path p, FileSystem fs) throws IOException { LineReader reader = new LineReader(fs.open(p)); Text t = new Text(); int cnt = 0; String prevSec = null; while (reader.readLine(t) > 0) { String[] arr = t.toString().split(","); if (prevSec == null || !arr[0].equals(prevSec)) { subdirMapping.put(arr[0], cnt); } offets[cnt] = Integer.parseInt(arr[3]); prevSec = arr[0]; cnt++; } reader.close(); }
int numberOfLinesPerSplit) throws IOException { List<FileSplit> splits = new ArrayList<FileSplit> (); Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); FileSystem fs = fileName.getFileSystem(conf); LineReader lr = null; try { FSDataInputStream in = fs.open(fileName); lr = new LineReader(in, conf); Text line = new Text(); long begin = 0; long length = 0; long recordLength = 0; int recordsRead = 0; while ((num = lr.readLine(line)) > 0) { if (line.toString().indexOf(">") >= 0){ recordsRead++;
end = start + split.getLength(); final Path file = split.getPath(); long totalLength = file.getFileSystem(context.getConfiguration()).getFileStatus(file).getLen(); compressionCodecs = new CompressionCodecFactory(config); final CompressionCodec codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(config); FSDataInputStream fileIn = fs.open(file); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), config); estLength = end; end = Long.MAX_VALUE; fileIn.seek(start); in = new LineReader(fileIn, config); start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
@Override protected void createInputReader(InputStream input, Configuration conf) throws IOException { in_ = new LineReader(input, conf); String fileURI = getFieldDefinitionFile(); Path path = new Path(fileURI); InputStream is = path.getFileSystem(conf).open(path); w3cLogParser_ = new W3CLogParser(is); is.close(); }
public CustomRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{ path = split.getPath(index); fs = path.getFileSystem(context.getConfiguration()); startOffset = split.getOffset(index); endOffset = startOffset + split.getLength(index); fileIn = fs.open(path); reader = new LineReader(fileIn); pos = startOffset; }
public static ArrayList<Partition> getPartitions(Path masterPath) throws IOException { ArrayList<Partition> partitions = new ArrayList<Partition>(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Text tempLine = new Text2(); @SuppressWarnings("resource") LineReader in = new LineReader(fs.open(masterPath)); while (in.readLine(tempLine) > 0) { Partition tempPartition = new Partition(); tempPartition.fromText(tempLine); partitions.add(tempPartition); } return partitions; }
@Override public boolean nextKeyValue() throws IOException { if (key == null) { // key.filename = path.getName() // key.offset = pos key = new PairOfStringLong(path.getName(), pos); } if (value == null){ value = new Text(); } int newSize = 0; if (pos < endOffset) { newSize = reader.readLine(value); pos += newSize; } if (newSize == 0) { key = null; value = null; return false; } else{ return true; } } }
value = new Text(); Text line = new Text(); int recordsRead = 0; while (pos < end) { key.set(pos); newSize = in.readLine(line, maxLineLength,Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),maxLineLength)); if(line.toString().indexOf(">") >= 0){ if(recordsRead > 9){//10 fasta records each time value.set(text.toString()); fileIn.seek(pos); in = new LineReader(fileIn, job); return true;
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (key.fileName == null) { key = new CombineFileWritable(); key.fileName = dPath.toString(); } key.offset = pos; if (value == null) { value = new Text(); } int newSize = 0; if (pos < end) { newSize = reader.readLine(value); pos += newSize; } if (newSize == 0) { key = null; value = null; return false; } else { return true; } }