format.setFilePath("file:///some/file/that/will/not/be/read"); 0, tempFile.length(), new String[]{ "localhost" }); format.setFieldDelimiter(delimiter); format.setFieldTypesGeneric(String.class, String.class, String.class); format.setCharset(charset); format.configure(new Configuration()); format.open(split); values = format.nextRecord(values); assertNull(format.nextRecord(values)); assertTrue(format.reachedEnd()); format.close();
return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset())); return false; } else { String lineAsString = new String(bytes, offset, numBytes, getCharset()); throw new ParseException("Line could not be parsed: '" + lineAsString + "'\n" + "ParserError " + parser.getErrorState() + " \n" + "Expect field types: "+fieldTypesToString() + " \n" + "in file: " + currentSplit.getPath()); startPos = skipFields(bytes, startPos, limit, this.fieldDelim); if (startPos < 0) { if (!lenient) { String lineAsString = new String(bytes, offset, numBytes, getCharset()); throw new ParseException("Line could not be parsed: '" + lineAsString+"'\n" + "Expect field types: "+fieldTypesToString()+" \n" + "in file: " + currentSplit.getPath()); } else {
public Class<?>[] getFieldTypes() { return super.getGenericFieldTypes(); }
p.setCharset(getCharset()); if (this.quotedStringParsing) { if (p instanceof StringParser) { readLine(); // read and ignore
public void setCommentPrefix(String commentPrefix) { if (commentPrefix != null) { this.commentPrefix = commentPrefix.getBytes(getCharset()); } else { this.commentPrefix = null; } this.commentPrefixString = commentPrefix; }
@Override public OUT nextRecord(OUT record) throws IOException { OUT returnRecord = null; do { returnRecord = super.nextRecord(record); } while (returnRecord == null && !reachedEnd()); return returnRecord; }
@Override public void open(FileInputSplit split) throws IOException { super.open(split); @SuppressWarnings("unchecked") FieldParser<Object>[] fieldParsers = (FieldParser<Object>[]) getFieldParsers(); //throw exception if no field parsers are available if (fieldParsers.length == 0) { throw new IOException("CsvInputFormat.open(FileInputSplit split) - no field parsers to parse input"); } // create the value holders this.parsedValues = new Object[fieldParsers.length]; for (int i = 0; i < fieldParsers.length; i++) { this.parsedValues[i] = fieldParsers[i].createValue(); } // left to right evaluation makes access [0] okay // this marker is used to fasten up readRecord, so that it doesn't have to check each call if the line ending is set to default if (this.getDelimiter().length == 1 && this.getDelimiter()[0] == '\n') { this.lineDelimiterIsLinebreak = true; } this.commentCount = 0; this.invalidLineCount = 0; }
protected void setFieldsGeneric(int[] sourceFieldIndices, Class<?>[] fieldTypes) { checkNotNull(sourceFieldIndices); checkNotNull(fieldTypes); checkArgument(sourceFieldIndices.length == fieldTypes.length, "Number of field indices and field types must match."); for (int i : sourceFieldIndices) { if (i < 0) { throw new IllegalArgumentException("Field indices must not be smaller than zero."); } } int largestFieldIndex = max(sourceFieldIndices); this.fieldIncluded = new boolean[largestFieldIndex + 1]; ArrayList<Class<?>> types = new ArrayList<Class<?>>(); // check if we support parsers for these types for (int i = 0; i < fieldTypes.length; i++) { Class<?> type = fieldTypes[i]; if (type != null) { if (FieldParser.getParserForType(type) == null) { throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format."); } types.add(type); fieldIncluded[sourceFieldIndices[i]] = true; } } this.fieldTypes = types.toArray(new Class<?>[types.size()]); }
@Override public void open(FileInputSplit split) throws IOException { super.open(split); // instantiate the parsers FieldParser<?>[] parsers = new FieldParser<?>[fieldTypes.length]; for (int i = 0; i < fieldTypes.length; i++) { if (fieldTypes[i] != null) { Class<? extends FieldParser<?>> parserType = FieldParser.getParserForType(fieldTypes[i]); if (parserType == null) { throw new RuntimeException("No parser available for type '" + fieldTypes[i].getName() + "'."); } FieldParser<?> p = InstantiationUtil.instantiate(parserType, FieldParser.class); p.setCharset(getCharset()); if (this.quotedStringParsing) { if (p instanceof StringParser) { ((StringParser)p).enableQuotedStringParsing(this.quoteCharacter); } else if (p instanceof StringValueParser) { ((StringValueParser)p).enableQuotedStringParsing(this.quoteCharacter); } } parsers[i] = p; } } this.fieldParsers = parsers; // skip the first line, if we are at the beginning of a file and have the option set if (this.skipFirstLineAsHeader && this.splitStart == 0) { readLine(); // read and ignore } }
public void setFieldDelimiter(String delimiter) { if (delimiter == null) { throw new IllegalArgumentException("Delimiter must not be null"); } this.fieldDelim = delimiter.getBytes(getCharset()); this.fieldDelimString = delimiter; }
@Override public OUT nextRecord(OUT record) throws IOException { OUT returnRecord = null; do { returnRecord = super.nextRecord(record); } while (returnRecord == null && !reachedEnd()); return returnRecord; }
@Override public void open(FileInputSplit split) throws IOException { super.open(split); @SuppressWarnings("unchecked") FieldParser<Object>[] fieldParsers = (FieldParser<Object>[]) getFieldParsers(); //throw exception if no field parsers are available if (fieldParsers.length == 0) { throw new IOException("CsvInputFormat.open(FileInputSplit split) - no field parsers to parse input"); } // create the value holders this.parsedValues = new Object[fieldParsers.length]; for (int i = 0; i < fieldParsers.length; i++) { this.parsedValues[i] = fieldParsers[i].createValue(); } // left to right evaluation makes access [0] okay // this marker is used to fasten up readRecord, so that it doesn't have to check each call if the line ending is set to default if (this.getDelimiter().length == 1 && this.getDelimiter()[0] == '\n') { this.lineDelimiterIsLinebreak = true; } this.commentCount = 0; this.invalidLineCount = 0; }
protected void setFieldsGeneric(int[] sourceFieldIndices, Class<?>[] fieldTypes) { checkNotNull(sourceFieldIndices); checkNotNull(fieldTypes); checkArgument(sourceFieldIndices.length == fieldTypes.length, "Number of field indices and field types must match."); for (int i : sourceFieldIndices) { if (i < 0) { throw new IllegalArgumentException("Field indices must not be smaller than zero."); } } int largestFieldIndex = max(sourceFieldIndices); this.fieldIncluded = new boolean[largestFieldIndex + 1]; ArrayList<Class<?>> types = new ArrayList<Class<?>>(); // check if we support parsers for these types for (int i = 0; i < fieldTypes.length; i++) { Class<?> type = fieldTypes[i]; if (type != null) { if (FieldParser.getParserForType(type) == null) { throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format."); } types.add(type); fieldIncluded[sourceFieldIndices[i]] = true; } } this.fieldTypes = types.toArray(new Class<?>[types.size()]); }
return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset())); return false; } else { String lineAsString = new String(bytes, offset, numBytes, getCharset()); throw new ParseException("Line could not be parsed: '" + lineAsString + "'\n" + "ParserError " + parser.getErrorState() + " \n" + "Expect field types: "+fieldTypesToString() + " \n" + "in file: " + filePath); startPos = skipFields(bytes, startPos, limit, this.fieldDelim); if (startPos < 0) { if (!lenient) { String lineAsString = new String(bytes, offset, numBytes, getCharset()); throw new ParseException("Line could not be parsed: '" + lineAsString+"'\n" + "Expect field types: "+fieldTypesToString()+" \n" + "in file: "+filePath); } else {
p.setCharset(getCharset()); if (this.quotedStringParsing) { if (p instanceof StringParser) { readLine(); // read and ignore
@Override public void setCharset(String charset) { super.setCharset(charset); if (this.fieldDelimString != null) { this.fieldDelim = fieldDelimString.getBytes(getCharset()); } if (this.commentPrefixString != null) { this.commentPrefix = commentPrefixString.getBytes(getCharset()); } }
@Override public OUT nextRecord(OUT record) throws IOException { OUT returnRecord = null; do { returnRecord = super.nextRecord(record); } while (returnRecord == null && !reachedEnd()); return returnRecord; }
@Override public void open(FileInputSplit split) throws IOException { super.open(split); @SuppressWarnings("unchecked") FieldParser<Object>[] fieldParsers = (FieldParser<Object>[]) getFieldParsers(); //throw exception if no field parsers are available if (fieldParsers.length == 0) { throw new IOException("CsvInputFormat.open(FileInputSplit split) - no field parsers to parse input"); } // create the value holders this.parsedValues = new Object[fieldParsers.length]; for (int i = 0; i < fieldParsers.length; i++) { this.parsedValues[i] = fieldParsers[i].createValue(); } // left to right evaluation makes access [0] okay // this marker is used to fasten up readRecord, so that it doesn't have to check each call if the line ending is set to default if (this.getDelimiter().length == 1 && this.getDelimiter()[0] == '\n') { this.lineDelimiterIsLinebreak = true; } this.commentCount = 0; this.invalidLineCount = 0; }
public Class<?>[] getFieldTypes() { return super.getGenericFieldTypes(); }
protected void setFieldsGeneric(int[] sourceFieldIndices, Class<?>[] fieldTypes) { checkNotNull(sourceFieldIndices); checkNotNull(fieldTypes); checkArgument(sourceFieldIndices.length == fieldTypes.length, "Number of field indices and field types must match."); for (int i : sourceFieldIndices) { if (i < 0) { throw new IllegalArgumentException("Field indices must not be smaller than zero."); } } int largestFieldIndex = max(sourceFieldIndices); this.fieldIncluded = new boolean[largestFieldIndex + 1]; ArrayList<Class<?>> types = new ArrayList<Class<?>>(); // check if we support parsers for these types for (int i = 0; i < fieldTypes.length; i++) { Class<?> type = fieldTypes[i]; if (type != null) { if (FieldParser.getParserForType(type) == null) { throw new IllegalArgumentException("The type '" + type.getName() + "' is not supported for the CSV input format."); } types.add(type); fieldIncluded[sourceFieldIndices[i]] = true; } } this.fieldTypes = types.toArray(new Class<?>[types.size()]); }