/** Write all rows added to the named output stream. */ public void writeTo(OutputStream out) throws IOException { writer.writeTo(out); }
public AvroColumnWriter(Schema s, ColumnFileMetaData meta, GenericData model) throws IOException { this.schema = s; AvroColumnator columnator = new AvroColumnator(s); meta.set(SCHEMA_KEY, s.toString()); // save schema in file this.writer = new ColumnFileWriter(meta, columnator.getColumns()); this.arrayWidths = columnator.getArrayWidths(); this.model = model; }
/** Add a row to the file. */ public void writeRow(Object... row) throws IOException { startRow(); for (int column = 0; column < columnCount; column++) writeValue(row[column], column); endRow(); }
/** Add a row to the file. */ public void write(D value) throws IOException { writer.startRow(); int count = write(value, schema, 0); assert(count == writer.getColumnCount()); writer.endRow(); }
case MAP: Map<?,?> map = (Map)o; writer.writeLength(map.size(), column); for (Map.Entry e : map.entrySet()) { writer.writeValue(null, column); writer.writeValue(e.getKey(), column+1); int c = write(e.getValue(), s.getValueType(), column+2); assert(c == column+arrayWidths[column]); case ARRAY: Collection elements = (Collection)o; writer.writeLength(elements.size(), column); if (isSimple(s.getElementType())) { // optimize simple arrays for (Object element : elements) writer.writeValue(null, column); int c = write(element, s.getElementType(), column+1); assert(c == column+arrayWidths[column]); if (branch.getType() == Schema.Type.NULL) continue; if (!selected) { writer.writeLength(0, column); column+=arrayWidths[column]; } else { writer.writeLength(1, column); if (isSimple(branch)) { writeValue(o, branch, column++); } else { writer.writeValue(null, column);
new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.LONG)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) { long l = random.nextLong(); out.writeRow(l); if (seekRowMap.containsKey(i)) seekValues[seekRowMap.get(i)] = l; out.writeTo(FILE);
@Test public void testEmptyFile() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta()); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(0, in.getColumnCount()); in.close(); }
private void writeValue(Object value, Schema s, int column) throws IOException { switch (s.getType()) { case STRING: if (value instanceof Utf8) // convert Utf8 to String value = value.toString(); break; case ENUM: if (value instanceof Enum) value = ((Enum)value).ordinal(); else value = s.getEnumOrdinal(value.toString()); break; case FIXED: value = ((GenericFixed)value).bytes(); break; } writer.writeValue(value, column); }
private void writeHeader(OutputStream out) throws IOException { OutputBuffer header = new OutputBuffer(); header.write(MAGIC); // magic header.writeFixed64(rowCount); // row count header.writeFixed32(columnCount); // column count metaData.write(header); // file metadata for (ColumnOutputBuffer column : columns) column.getMeta().write(header); // column metadata for (long start : computeStarts(header.size())) header.writeFixed64(start); // column starts header.writeTo(out); }
/** Construct given metadata for each column in the file. */ public ColumnFileWriter(ColumnFileMetaData fileMeta, ColumnMetaData... columnMeta) throws IOException { checkColumns(columnMeta); this.metaData = fileMeta; this.columnCount = columnMeta.length; this.columns = new ColumnOutputBuffer[columnCount]; for (int i = 0; i < columnCount; i++) { ColumnMetaData c = columnMeta[i]; c.setDefaults(metaData); columns[i] = c.isArray() ? new ArrayColumnOutputBuffer(this, c) : new ColumnOutputBuffer(this, c); size += OutputBuffer.BLOCK_SIZE; // over-estimate } }
new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.STRING) .hasIndexValues(true)); out.writeRow(values[i]); if (seekRowMap.containsKey(i)) seekValues[seekRowMap.get(i)] = values[i]; out.writeTo(FILE);
@Test public void testEmptyColumn() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); ColumnValues<Integer> values = in.getValues("test"); for (int i : values) throw new Exception("no value should be found"); in.close(); }
/** Add a row to the file. */ public void write(D value) throws IOException { writer.startRow(); int count = write(value, schema, 0); assert(count == writer.getColumnCount()); writer.endRow(); }
case MAP: Map<?,?> map = (Map)o; writer.writeLength(map.size(), column); for (Map.Entry e : map.entrySet()) { writer.writeValue(null, column); writer.writeValue(e.getKey(), column+1); int c = write(e.getValue(), s.getValueType(), column+2); assert(c == column+arrayWidths[column]); case ARRAY: Collection elements = (Collection)o; writer.writeLength(elements.size(), column); if (isSimple(s.getElementType())) { // optimize simple arrays for (Object element : elements) writer.writeValue(null, column); int c = write(element, s.getElementType(), column+1); assert(c == column+arrayWidths[column]); if (branch.getType() == Schema.Type.NULL) continue; if (!selected) { writer.writeLength(0, column); column+=arrayWidths[column]; } else { writer.writeLength(1, column); if (isSimple(branch)) { writeValue(o, branch, column++); } else { writer.writeValue(null, column);
private void writeValue(Object value, Schema s, int column) throws IOException { switch (s.getType()) { case STRING: if (value instanceof Utf8) // convert Utf8 to String value = value.toString(); break; case ENUM: if (value instanceof Enum) value = ((Enum)value).ordinal(); else value = s.getEnumOrdinal(value.toString()); break; case FIXED: value = ((GenericFixed)value).bytes(); break; } writer.writeValue(value, column); }
private void writeHeader(OutputStream out) throws IOException { OutputBuffer header = new OutputBuffer(); header.write(MAGIC); // magic header.writeFixed64(rowCount); // row count header.writeFixed32(columnCount); // column count metaData.write(header); // file metadata for (ColumnOutputBuffer column : columns) column.getMeta().write(header); // column metadata for (long start : computeStarts(header.size())) header.writeFixed64(start); // column starts header.writeTo(out); }
/** Construct given metadata for each column in the file. */ public ColumnFileWriter(ColumnFileMetaData fileMeta, ColumnMetaData... columnMeta) throws IOException { checkColumns(columnMeta); this.metaData = fileMeta; this.columnCount = columnMeta.length; this.columns = new ColumnOutputBuffer[columnCount]; for (int i = 0; i < columnCount; i++) { ColumnMetaData c = columnMeta[i]; c.setDefaults(metaData); columns[i] = c.isArray() ? new ArrayColumnOutputBuffer(this, c) : new ColumnOutputBuffer(this, c); size += OutputBuffer.BLOCK_SIZE; // over-estimate } }
@Test public void testLongs() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.LONG)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(random.nextLong()); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<Long> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(random.nextLong(), (long)i.next()); count++; } Assert.assertEquals(COUNT, count); }
/** Add a row to the file. */ public void writeRow(Object... row) throws IOException { startRow(); for (int column = 0; column < columnCount; column++) writeValue(row[column], column); endRow(); }
@Test public void testEmptyFile() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta()); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(0, in.getColumnCount()); in.close(); }