/** Construct a reader for a file. */ public AvroColumnReader(Params params) throws IOException { this.reader = new ColumnFileReader(params.input); this.model = params.model; this.fileSchema = Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY)); this.readSchema = params.schema == null ? fileSchema : params.schema; initialize(); }
/** Read a Trevni file and print each row as a JSON object. */ public void dump(Input input, PrintStream out, boolean pretty) throws IOException { this.generator = FACTORY.createJsonGenerator(out, JsonEncoding.UTF8); if (pretty) { generator.useDefaultPrettyPrinter(); } else { // ensure newline separation MinimalPrettyPrinter pp = new MinimalPrettyPrinter(); pp.setRootValueSeparator(System.getProperty("line.separator")); generator.setPrettyPrinter(pp); } this.reader = new ColumnFileReader(input); generator.writeStartObject(); generator.writeNumberField("rowCount", reader.getRowCount()); generator.writeNumberField("columnCount", reader.getColumnCount()); generator.writeFieldName("metadata"); dump(reader.getMetaData()); generator.writeFieldName("columns"); generator.writeStartArray(); for (ColumnMetaData c : reader.getColumnMetaData()) dump(c); generator.writeEndArray(); generator.writeEndObject(); generator.flush(); out.println(); reader.close(); }
this.reader = new ColumnFileReader(input);
ColumnFileReader in = new ColumnFileReader(FILE); ColumnValues<Long> v = in.getValues("test");
ColumnFileReader in = new ColumnFileReader(FILE); ColumnValues<String> v = in.getValues("test");
@Test public void testEmptyFile() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta()); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(0, in.getColumnCount()); in.close(); }
@Test public void testLongs() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.LONG)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(random.nextLong()); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<Long> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(random.nextLong(), (long)i.next()); count++; } Assert.assertEquals(COUNT, count); }
@Test public void testEmptyColumn() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); ColumnValues<Integer> values = in.getValues("test"); for (int i : values) throw new Exception("no value should be found"); in.close(); }
@Test public void testTwoColumn() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("a", ValueType.FIXED32), new ColumnMetaData("b", ValueType.STRING)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(random.nextInt(), TestUtil.randomString(random)); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(2, in.getColumnCount()); Iterator<String> i = in.getValues("a"); Iterator<String> j = in.getValues("b"); int count = 0; while (i.hasNext() && j.hasNext()) { Assert.assertEquals(random.nextInt(), i.next()); Assert.assertEquals(TestUtil.randomString(random), j.next()); count++; } Assert.assertEquals(COUNT, count); }
@Test public void testStrings() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.STRING)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(TestUtil.randomString(random)); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<String> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(TestUtil.randomString(random), i.next()); count++; } Assert.assertEquals(COUNT, count); }
@Test public void testInts() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(TestUtil.randomLength(random)); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<Integer> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(TestUtil.randomLength(random), (int)i.next()); count++; } Assert.assertEquals(COUNT, count); }
public TrevniScanner(Configuration conf, Schema schema, TableMeta meta, FileFragment fragment) throws IOException { super(conf, schema, meta, fragment); reader = new ColumnFileReader(new HadoopInput(fragment.getPath(), conf)); }
/** Construct a reader for a file. */ public AvroColumnReader(Params params) throws IOException { this.reader = new ColumnFileReader(params.input); this.model = params.model; this.fileSchema = Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY)); this.readSchema = params.schema == null ? fileSchema : params.schema; initialize(); }
ColumnFileReader in = new ColumnFileReader(FILE); ColumnValues<Long> v = in.getValues("test");
@Test public void testEmptyFile() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta()); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(0, in.getColumnCount()); in.close(); }
@Test public void testEmptyColumn() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(0, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); ColumnValues<Integer> values = in.getValues("test"); for (int i : values) throw new Exception("no value should be found"); in.close(); }
@Test public void testLongs() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.LONG)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(random.nextLong()); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<Long> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(random.nextLong(), (long)i.next()); count++; } Assert.assertEquals(COUNT, count); }
@Test public void testTwoColumn() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("a", ValueType.FIXED32), new ColumnMetaData("b", ValueType.STRING)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(random.nextInt(), TestUtil.randomString(random)); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(2, in.getColumnCount()); Iterator<String> i = in.getValues("a"); Iterator<String> j = in.getValues("b"); int count = 0; while (i.hasNext() && j.hasNext()) { Assert.assertEquals(random.nextInt(), i.next()); Assert.assertEquals(TestUtil.randomString(random), j.next()); count++; } Assert.assertEquals(COUNT, count); }
@Test public void testInts() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(TestUtil.randomLength(random)); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<Integer> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(TestUtil.randomLength(random), (int)i.next()); count++; } Assert.assertEquals(COUNT, count); }
@Test public void testStrings() throws Exception { FILE.delete(); ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.STRING)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(TestUtil.randomString(random)); out.writeTo(FILE); random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); Assert.assertEquals(COUNT, in.getRowCount()); Assert.assertEquals(1, in.getColumnCount()); Iterator<String> i = in.getValues("test"); int count = 0; while (i.hasNext()) { Assert.assertEquals(TestUtil.randomString(random), i.next()); count++; } Assert.assertEquals(COUNT, count); }