public VarChar(String obj) { super(obj == null ? null : new Text(obj)); }
@Override public Text readText() { int length = holder.end - holder.start; byte[] value = new byte [length]; holder.buffer.getBytes(holder.start, value, 0, length); Text text = new Text(); text.set(value); return text; }
/** * Get the variable length element at specified index as Text. * * @param index position of element to get * @return Text object for non-null element, null otherwise */ public Text getObject(int index) { Text result = new Text(); byte[] b; try { b = get(index); } catch (IllegalStateException e) { return null; } result.set(b); return result; }
@Override public Text readText() { if (!isSet()) { return null; } int length = holder.end - holder.start; byte[] value = new byte [length]; holder.buffer.getBytes(holder.start, value, 0, length); Text text = new Text(); text.set(value); return text; }
@Test public void testFileWithNulls() throws Exception { HashMap<String, FieldInfo> fields3 = new HashMap<>(); ParquetTestProperties props3 = new ParquetTestProperties(1, 3000, DEFAULT_BYTES_PER_PAGE, fields3); // actually include null values Object[] valuesWithNull = {new Text(""), new Text("longer string"), null}; props3.fields.put("a", new FieldInfo("boolean", "a", 1, valuesWithNull, TypeProtos.MinorType.BIT, props3)); testParquetFullEngineEventBased(false, "/parquet/parquet_scan_screen_read_entry_replace.json", "\"/tmp/nullable_with_nulls.parquet\"", "unused", 1, props3); }
/** * Convenience method to create a {@link JsonStringHashMap<String, Object> map} instance with the given key value sequence. * * Key value sequence consists of key - value pairs such that a key precedes its value. For instance: * * mapOf("name", "Adam", "age", 41) corresponds to {"name": "Adam", "age": 41} in JSON. */ public static JsonStringHashMap<String, Object> mapOf(Object... keyValueSequence) { Preconditions.checkArgument(keyValueSequence.length%2==0, "Length of key value sequence must be even"); final JsonStringHashMap<String, Object> map = new JsonStringHashMap<>(); for (int i=0; i<keyValueSequence.length; i+=2) { Object value = keyValueSequence[i+1]; if (value instanceof CharSequence) { value = new Text(value.toString()); } map.put((String)keyValueSequence[i], value); } return map; }
/** * Convenience method to create a {@link JsonStringArrayList list} from the given values. */ public static JsonStringArrayList<Object> listOf(Object... values) { final JsonStringArrayList<Object> list = new JsonStringArrayList<>(); for (Object value:values) { if (value instanceof CharSequence) { list.add(new Text(value.toString())); } else { list.add(value); } } return list; }
@Test /** * Tests the reading of nullable var length columns, runs the tests twice, once on a file that has * a converted type of UTF-8 to make sure it can be read */ public void testNullableColumnsVarLen() throws Exception { HashMap<String, FieldInfo> fields = new HashMap<>(); ParquetTestProperties props = new ParquetTestProperties(1, 300000, DEFAULT_BYTES_PER_PAGE, fields); byte[] val = {'b'}; byte[] val2 = {'b', '2'}; byte[] val3 = {'b', '3'}; byte[] val4 = { 'l','o','n','g','e','r',' ','s','t','r','i','n','g'}; Object[] byteArrayVals = { val, val2, val4}; props.fields.put("a", new FieldInfo("boolean", "a", 1, byteArrayVals, TypeProtos.MinorType.BIT, props)); testParquetFullEngineEventBased(false, "/parquet/parquet_nullable_varlen.json", "/tmp/nullable_varlen.parquet", 1, props); HashMap<String, FieldInfo> fields2 = new HashMap<>(); // pass strings instead of byte arrays Object[] textVals = { new org.apache.arrow.vector.util.Text("b"), new org.apache.arrow.vector.util.Text("b2"), new org.apache.arrow.vector.util.Text("b3") }; ParquetTestProperties props2 = new ParquetTestProperties(1, 30000, DEFAULT_BYTES_PER_PAGE, fields2); props2.fields.put("a", new FieldInfo("boolean", "a", 1, textVals, TypeProtos.MinorType.BIT, props2)); testParquetFullEngineEventBased(false, "/parquet/parquet_scan_screen_read_entry_replace.json", "\"/tmp/varLen.parquet/a\"", "unused", 1, props2); }
private Object convertAvroValToDremio(Object value, boolean root) { if (value instanceof ByteBuffer) { ByteBuffer bb = ((ByteBuffer)value); byte[] val = new byte[((ByteBuffer)value).remaining()]; bb.get(val); bb.position(0); value = val; } else if (!root && value instanceof CharSequence) { value = new Text(value.toString()); } else if (value instanceof GenericData.Array) { GenericData.Array array = ((GenericData.Array) value); final JsonStringArrayList<Object> list = new JsonStringArrayList<>(); for (Object o : array) { list.add(convertAvroValToDremio(o, false)); } value = list; } else if (value instanceof GenericData.EnumSymbol) { value = value.toString(); } else if (value instanceof GenericData.Record) { GenericData.Record rec = ((GenericData.Record) value); final JsonStringHashMap<String, Object> newRecord = new JsonStringHashMap<>(); for (Schema.Field field : rec.getSchema().getFields()) { Object val = rec.get(field.name()); newRecord.put(field.name(), convertAvroValToDremio(val, false)); } value = newRecord; } return value; }
@Test public void testArrayOfIPv4() throws Exception { String ip1 = "10.0.0.1"; String ip2 = "192.168.0.1"; String ip3 = "10.0.8.6"; String ip4 = "10.0.8.5"; ElasticsearchCluster.ColumnData[] data = new ElasticsearchCluster.ColumnData[]{ new ElasticsearchCluster.ColumnData("ip_field", IP, null, new Object[][]{ new Object[]{ip1, ip2, ip3}, new Object[]{ip4, ip1} }) }; elastic.load(schema, table, data); JsonStringArrayList<Text> values1 = new JsonStringArrayList<>(); JsonStringArrayList<Text> values2 = new JsonStringArrayList<>(); values1.add(new Text(ip1)); values1.add(new Text(ip2)); values1.add(new Text(ip3)); values2.add(new Text(ip4)); values2.add(new Text(ip1)); testBuilder() .sqlQuery("select ip_field from elasticsearch." + schema + "." + table) .baselineColumns("ip_field") .unOrdered() .baselineValues(values1) .baselineValues(values2) .go(); }