Tabnine Logo
OrcReader
Code IndexAdd Tabnine to your IDE (free)

How to use
OrcReader
in
io.prestosql.orc

Best Java code snippets using io.prestosql.orc.OrcReader (Showing top 20 results out of 315)

origin: prestosql/presto

public TempFileReader(List<Type> types, OrcDataSource dataSource)
{
  this.types = ImmutableList.copyOf(requireNonNull(types, "types is null"));
  try {
    OrcReader orcReader = new OrcReader(
        dataSource,
        ORC,
        new DataSize(1, MEGABYTE),
        new DataSize(8, MEGABYTE),
        new DataSize(8, MEGABYTE),
        new DataSize(16, MEGABYTE));
    Map<Integer, Type> includedColumns = new HashMap<>();
    for (int i = 0; i < types.size(); i++) {
      includedColumns.put(i, types.get(i));
    }
    reader = orcReader.createRecordReader(
        includedColumns,
        OrcPredicate.TRUE,
        UTC,
        newSimpleAggregatedMemoryContext(),
        INITIAL_BATCH_SIZE);
  }
  catch (IOException e) {
    throw new PrestoException(HIVE_WRITER_DATA_ERROR, "Failed to read temporary data");
  }
}
origin: prestosql/presto

OrcReader reader = new OrcReader(dataSource, ORC, readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize(), readerAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE);
Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage, INITIAL_BATCH_SIZE);
origin: prestosql/presto

private List<ColumnInfo> getColumnInfo(OrcReader reader)
{
  Optional<OrcFileMetadata> metadata = getOrcFileMetadata(reader);
  if (metadata.isPresent()) {
    return getColumnInfoFromOrcUserMetadata(metadata.get());
  }
  // support for legacy files without metadata
  return getColumnInfoFromOrcColumnTypes(reader.getColumnNames(), reader.getFooter().getTypes());
}
origin: prestosql/presto

private static ColumnStats doComputeColumnStats(OrcReader orcReader, long columnId, Type type)
    throws IOException
{
  int columnIndex = columnIndex(orcReader.getColumnNames(), columnId);
  OrcRecordReader reader = orcReader.createRecordReader(ImmutableMap.of(columnIndex, type), OrcPredicate.TRUE, UTC, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
  if (type.equals(BooleanType.BOOLEAN)) {
    return indexBoolean(type, reader, columnIndex, columnId);
  }
  if (type.equals(BigintType.BIGINT) ||
      type.equals(DateType.DATE) ||
      type.equals(TimestampType.TIMESTAMP)) {
    return indexLong(type, reader, columnIndex, columnId);
  }
  if (type.equals(DoubleType.DOUBLE)) {
    return indexDouble(type, reader, columnIndex, columnId);
  }
  if (type instanceof VarcharType) {
    return indexString(type, reader, columnIndex, columnId);
  }
  return null;
}
origin: io.prestosql/presto-orc

public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold)
    throws IOException
{
  OrcReader orcReader = new OrcReader(orcDataSource, ORC, maxMergeDistance, maxReadSize, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE));
  // 1 for reading file footer
  assertEquals(orcDataSource.getReadCount(), 1);
  List<StripeInformation> stripes = orcReader.getFooter().getStripes();
  // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
  assertGreaterThanOrEqual(stripes.size(), 3);
  //verify wrapped by CachingOrcReader
  assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold), CachingOrcDataSource.class);
  OrcRecordReader orcRecordReader = orcReader.createRecordReader(
      ImmutableMap.of(0, VARCHAR),
      (numberOfRows, statisticsByColumnIndex) -> true,
      HIVE_STORAGE_TIME_ZONE,
      newSimpleAggregatedMemoryContext(),
      INITIAL_BATCH_SIZE);
  int positionCount = 0;
  while (true) {
    int batchSize = orcRecordReader.nextBatch();
    if (batchSize <= 0) {
      break;
    }
    Block block = orcRecordReader.readBlock(VARCHAR, 0);
    positionCount += block.getPositionCount();
  }
  assertEquals(positionCount, POSITION_COUNT);
}
origin: io.prestosql/presto-orc

@Test
public void testReadUserMetadata()
    throws Exception
{
  try (TempFile tempFile = new TempFile()) {
    Map<String, String> metadata = ImmutableMap.of(
        "a", "ala",
        "b", "ma",
        "c", "kota");
    createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
    OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, ORC, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE));
    Footer footer = orcReader.getFooter();
    Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
    assertEquals(readMetadata, metadata);
  }
}
origin: prestosql/presto

public static OrcRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types)
    throws IOException
{
  OrcReader orcReader = new OrcReader(dataSource, ORC, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE));
  List<String> columnNames = orcReader.getColumnNames();
  assertEquals(columnNames.size(), columnIds.size());
  Map<Integer, Type> includedColumns = new HashMap<>();
  int ordinal = 0;
  for (long columnId : columnIds) {
    assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
    includedColumns.put(ordinal, types.get(ordinal));
    ordinal++;
  }
  return createRecordReader(orcReader, includedColumns);
}
origin: prestosql/presto

  throws IOException
orcDataSource = wrapWithCacheIfTiny(orcDataSource, tinyStripeThreshold);
this.orcDataSource = orcDataSource;
requireNonNull(orcEncoding, "orcEncoding is null");
  if (!isValidHeaderMagic(orcDataSource)) {
    throw new OrcCorruptionException(orcDataSource.getId(), "Not an ORC file");
checkOrcVersion(orcDataSource, postScript.getVersion());
validateWrite(validation -> validation.getVersion().equals(postScript.getVersion()), "Unexpected version");
validateWrite(validation -> validation.getCompression() == compressionKind, "Unexpected compression");
validateWrite(validation -> validation.getColumnNames().equals(getColumnNames()), "Unexpected column names");
validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group");
if (writeValidation.isPresent()) {
  writeValidation.get().validateMetadata(orcDataSource.getId(), footer.getUserMetadata());
origin: prestosql/presto

public static OrcRecordReader createRecordReader(OrcReader orcReader, Map<Integer, Type> includedColumns)
{
  return orcReader.createRecordReader(includedColumns, OrcPredicate.TRUE, DateTimeZone.UTC, newSimpleAggregatedMemoryContext(), MAX_BATCH_SIZE);
}
origin: prestosql/presto

private List<ColumnStats> computeShardStats(File file)
{
  try (OrcDataSource dataSource = fileOrcDataSource(defaultReaderAttributes, file)) {
    OrcReader reader = new OrcReader(dataSource, ORC, defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getMaxReadSize(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE);
    ImmutableList.Builder<ColumnStats> list = ImmutableList.builder();
    for (ColumnInfo info : getColumnInfo(reader)) {
      computeColumnStats(reader, info.getColumnId(), info.getType()).ifPresent(list::add);
    }
    return list.build();
  }
  catch (IOException e) {
    throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + file, e);
  }
}
origin: prestosql/presto

  private static Map<String, Integer> buildPhysicalNameOrdinalMap(OrcReader reader)
  {
    ImmutableMap.Builder<String, Integer> physicalNameOrdinalMap = ImmutableMap.builder();

    int ordinal = 0;
    for (String physicalColumnName : reader.getColumnNames()) {
      physicalNameOrdinalMap.put(physicalColumnName, ordinal);
      ordinal++;
    }

    return physicalNameOrdinalMap.build();
  }
}
origin: prestosql/presto

private static Optional<OrcFileMetadata> getOrcFileMetadata(OrcReader reader)
{
  return Optional.ofNullable(reader.getFooter().getUserMetadata().get(OrcFileMetadata.KEY))
      .map(slice -> METADATA_CODEC.fromJson(slice.getBytes()));
}
origin: prestosql/presto

public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold)
    throws IOException
{
  OrcReader orcReader = new OrcReader(orcDataSource, ORC, maxMergeDistance, maxReadSize, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE));
  // 1 for reading file footer
  assertEquals(orcDataSource.getReadCount(), 1);
  List<StripeInformation> stripes = orcReader.getFooter().getStripes();
  // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
  assertGreaterThanOrEqual(stripes.size(), 3);
  //verify wrapped by CachingOrcReader
  assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold), CachingOrcDataSource.class);
  OrcRecordReader orcRecordReader = orcReader.createRecordReader(
      ImmutableMap.of(0, VARCHAR),
      (numberOfRows, statisticsByColumnIndex) -> true,
      HIVE_STORAGE_TIME_ZONE,
      newSimpleAggregatedMemoryContext(),
      INITIAL_BATCH_SIZE);
  int positionCount = 0;
  while (true) {
    int batchSize = orcRecordReader.nextBatch();
    if (batchSize <= 0) {
      break;
    }
    Block block = orcRecordReader.readBlock(VARCHAR, 0);
    positionCount += block.getPositionCount();
  }
  assertEquals(positionCount, POSITION_COUNT);
}
origin: prestosql/presto

@Test
public void testReadUserMetadata()
    throws Exception
{
  try (TempFile tempFile = new TempFile()) {
    Map<String, String> metadata = ImmutableMap.of(
        "a", "ala",
        "b", "ma",
        "c", "kota");
    createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
    OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, ORC, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE));
    Footer footer = orcReader.getFooter();
    Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
    assertEquals(readMetadata, metadata);
  }
}
origin: prestosql/presto

public static OrcRecordReader createReaderNoRows(OrcDataSource dataSource)
    throws IOException
{
  OrcReader orcReader = new OrcReader(dataSource, ORC, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE));
  assertEquals(orcReader.getColumnNames().size(), 0);
  return createRecordReader(orcReader, ImmutableMap.of());
}
origin: io.prestosql/presto-orc

  throws IOException
orcDataSource = wrapWithCacheIfTiny(orcDataSource, tinyStripeThreshold);
this.orcDataSource = orcDataSource;
requireNonNull(orcEncoding, "orcEncoding is null");
  if (!isValidHeaderMagic(orcDataSource)) {
    throw new OrcCorruptionException(orcDataSource.getId(), "Not an ORC file");
checkOrcVersion(orcDataSource, postScript.getVersion());
validateWrite(validation -> validation.getVersion().equals(postScript.getVersion()), "Unexpected version");
validateWrite(validation -> validation.getCompression() == compressionKind, "Unexpected compression");
validateWrite(validation -> validation.getColumnNames().equals(getColumnNames()), "Unexpected column names");
validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group");
if (writeValidation.isPresent()) {
  writeValidation.get().validateMetadata(orcDataSource.getId(), footer.getUserMetadata());
origin: io.prestosql/presto-orc

public OrcRecordReader createRecordReader(
    Map<Integer, Type> includedColumns,
    OrcPredicate predicate,
    long offset,
    long length,
    DateTimeZone hiveStorageTimeZone,
    AggregatedMemoryContext systemMemoryUsage,
    int initialBatchSize)
{
  return new OrcRecordReader(
      requireNonNull(includedColumns, "includedColumns is null"),
      requireNonNull(predicate, "predicate is null"),
      footer.getNumberOfRows(),
      footer.getStripes(),
      footer.getFileStats(),
      metadata.getStripeStatsList(),
      orcDataSource,
      offset,
      length,
      footer.getTypes(),
      decompressor,
      footer.getRowsInRowGroup(),
      requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"),
      hiveWriterVersion,
      metadataReader,
      maxMergeDistance,
      tinyStripeThreshold,
      maxBlockSize,
      footer.getUserMetadata(),
      systemMemoryUsage,
origin: prestosql/presto

private static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColumnHandle> columns, boolean useOrcColumnNames, OrcReader reader, Path path)
{
  if (!useOrcColumnNames) {
    return columns;
  }
  verifyFileHasColumnNames(reader.getColumnNames(), path);
  Map<String, Integer> physicalNameOrdinalMap = buildPhysicalNameOrdinalMap(reader);
  int nextMissingColumnIndex = physicalNameOrdinalMap.size();
  ImmutableList.Builder<HiveColumnHandle> physicalColumns = ImmutableList.builder();
  for (HiveColumnHandle column : columns) {
    Integer physicalOrdinal = physicalNameOrdinalMap.get(column.getName());
    if (physicalOrdinal == null) {
      // if the column is missing from the file, assign it a column number larger
      // than the number of columns in the file so the reader will fill it with nulls
      physicalOrdinal = nextMissingColumnIndex;
      nextMissingColumnIndex++;
    }
    physicalColumns.add(new HiveColumnHandle(column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment()));
  }
  return physicalColumns.build();
}
origin: io.prestosql/presto-orc

  static void validateFile(
      OrcWriteValidation writeValidation,
      OrcDataSource input,
      List<Type> types,
      DateTimeZone hiveStorageTimeZone,
      OrcEncoding orcEncoding)
      throws OrcCorruptionException
  {
    ImmutableMap.Builder<Integer, Type> readTypes = ImmutableMap.builder();
    for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
      readTypes.put(columnIndex, types.get(columnIndex));
    }
    try {
      OrcReader orcReader = new OrcReader(input, orcEncoding, new DataSize(1, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(16, MEGABYTE), Optional.of(writeValidation));
      try (OrcRecordReader orcRecordReader = orcReader.createRecordReader(readTypes.build(), OrcPredicate.TRUE, hiveStorageTimeZone, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE)) {
        while (orcRecordReader.nextBatch() >= 0) {
          // ignored
        }
      }
    }
    catch (IOException e) {
      throw new OrcCorruptionException(e, input.getId(), "Validation failed");
    }
  }
}
origin: io.prestosql/presto-orc

Footer footer = new OrcReader(orcDataSource, ORC, dataSize, dataSize, dataSize, dataSize).getFooter();
io.prestosql.orcOrcReader

Most used methods

  • <init>
  • createRecordReader
  • getColumnNames
  • getFooter
  • checkOrcVersion
    Check to see if this ORC file is from a future version and if so, warn the user that we may not be a
  • isValidHeaderMagic
    Does the file start with the ORC magic bytes?
  • validateFile
  • validateWrite
  • wrapWithCacheIfTiny

Popular in Java

  • Creating JSON documents from java classes using gson
  • addToBackStack (FragmentTransaction)
  • notifyDataSetChanged (ArrayAdapter)
  • scheduleAtFixedRate (Timer)
  • BufferedImage (java.awt.image)
    The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
  • Time (java.sql)
    Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
  • ArrayList (java.util)
    ArrayList is an implementation of List, backed by an array. All optional operations including adding
  • Map (java.util)
    A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
  • ResourceBundle (java.util)
    ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
  • Response (javax.ws.rs.core)
    Defines the contract between a returned instance and the runtime when an application needs to provid
  • Github Copilot alternatives
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now