/** * Reads the schema and initializes the vectors. */ private void initialize() throws IOException { Schema originalSchema = readSchema(); List<Field> fields = new ArrayList<>(); List<FieldVector> vectors = new ArrayList<>(); Map<Long, Dictionary> dictionaries = new HashMap<>(); // Convert fields with dictionaries to have the index type for (Field field : originalSchema.getFields()) { Field updated = DictionaryUtility.toMemoryFormat(field, allocator, dictionaries); fields.add(updated); vectors.add(updated.createVector(allocator)); } Schema schema = new Schema(fields, originalSchema.getCustomMetadata()); this.root = new VectorSchemaRoot(schema, vectors, 0); this.loader = new VectorLoader(root); this.dictionaries = Collections.unmodifiableMap(dictionaries); // Read and load all dictionaries from schema for (int i = 0; i < dictionaries.size(); i++) { ArrowDictionaryBatch dictionaryBatch = readDictionary(); loadDictionary(dictionaryBatch); } }
public void start(Schema schema, DictionaryProvider provider) throws IOException { List<Field> fields = new ArrayList<>(schema.getFields().size()); Set<Long> dictionaryIdsUsed = new HashSet<>(); this.schema = schema; // Store original Schema to ensure batches written match // Convert fields with dictionaries to have dictionary type for (Field field : schema.getFields()) { fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed)); } Schema updatedSchema = new Schema(fields, schema.getCustomMetadata()); generator.writeStartObject(); generator.writeObjectField("schema", updatedSchema); // Write all dictionaries that were used if (!dictionaryIdsUsed.isEmpty()) { writeDictionaryBatches(generator, dictionaryIdsUsed, provider); } // Start writing of record batches generator.writeArrayFieldStart("batches"); }
updatedChildren.add(toMessageFormat(child, provider, dictionaryIdsUsed));
public Schema start() throws JsonParseException, IOException { readToken(START_OBJECT); { Schema originalSchema = readNextField("schema", Schema.class); List<Field> fields = new ArrayList<>(); dictionaries = new HashMap<>(); // Convert fields with dictionaries to have the index type for (Field field : originalSchema.getFields()) { fields.add(DictionaryUtility.toMemoryFormat(field, allocator, dictionaries)); } this.schema = new Schema(fields, originalSchema.getCustomMetadata()); if (!dictionaries.isEmpty()) { nextFieldIs("dictionaries"); readDictionaryBatches(); } nextFieldIs("batches"); readToken(START_ARRAY); started = true; return this.schema; } }
fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed));
updatedChildren.add(toMemoryFormat(child, allocator, dictionaries));