public abstract class BaseParquetMetadataProvider extends Object implements ParquetMetadataProvider
ParquetMetadataProvider
which contains base methods for obtaining metadata from
parquet statistics.Modifier and Type | Class and Description |
---|---|
static class |
BaseParquetMetadataProvider.Builder<T extends BaseParquetMetadataProvider.Builder<T>> |
Modifier and Type | Field and Description |
---|---|
protected List<ReadEntryWithPath> |
entries |
protected Set<org.apache.hadoop.fs.Path> |
fileSet |
static Object |
NULL_VALUE
HashBasedTable cannot contain nulls, used this object to represent null values. |
protected MetadataBase.ParquetTableMetadataBase |
parquetTableMetadata |
protected ParquetReaderConfig |
readerConfig |
protected TupleMetadata |
schema |
protected DrillStatsTable |
statsTable |
protected org.apache.hadoop.fs.Path |
tableLocation |
protected String |
tableName |
Modifier | Constructor and Description |
---|---|
protected |
BaseParquetMetadataProvider(BaseParquetMetadataProvider.Builder<?> builder) |
Modifier and Type | Method and Description |
---|---|
boolean |
checkMetadataVersion()
Whether metadata actuality should be checked.
|
List<ReadEntryWithPath> |
getEntries()
Returns list of
ReadEntryWithPath instances which represents paths to files to be scanned. |
FileMetadata |
getFileMetadata(org.apache.hadoop.fs.Path location)
Returns
FileMetadata instance which corresponds to metadata of file for specified location. |
Set<org.apache.hadoop.fs.Path> |
getFileSet()
Returns list of file locations for table.
|
List<FileMetadata> |
getFilesForPartition(PartitionMetadata partition)
Returns list of
FileMetadata instances which belongs to specified partitions. |
Map<org.apache.hadoop.fs.Path,FileMetadata> |
getFilesMetadataMap()
Returns map of
FileMetadata instances which provides metadata for specific file and its columns. |
List<org.apache.hadoop.fs.Path> |
getLocations()
Returns list of file paths which belong to current table.
|
NonInterestingColumnsMetadata |
getNonInterestingColumnsMetadata()
Returns
NonInterestingColumnsMetadata instance which provides metadata for non-interesting columns. |
List<SchemaPath> |
getPartitionColumns()
Returns list of partition columns for table from this
TableMetadataProvider . |
List<PartitionMetadata> |
getPartitionMetadata(SchemaPath columnName)
Returns list of
PartitionMetadata instances which corresponds to partitions for specified column
and provides metadata for specific partitions and its columns. |
List<PartitionMetadata> |
getPartitionsMetadata()
Returns list of
PartitionMetadata instances which provides metadata for specific partitions and its columns. |
List<RowGroupMetadata> |
getRowGroupsMeta()
Returns list of
RowGroupMetadata instances which provides metadata for specific row group and its columns. |
org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> |
getRowGroupsMetadataMap()
Returns multimap of
RowGroupMetadata instances which provides metadata
for specific row group and its columns mapped to their locations. |
Map<org.apache.hadoop.fs.Path,SegmentMetadata> |
getSegmentsMetadataMap()
Returns map of
SegmentMetadata instances which provides metadata for segment and its columns. |
TableMetadata |
getTableMetadata()
Returns
TableMetadata instance which provides metadata for table and columns metadata. |
protected void |
init(BaseParquetMetadataProvider metadataProvider) |
void |
initializeMetadata()
Method which initializes all metadata kinds to get rid of parquetTableMetadata.
|
protected abstract void |
initInternal() |
public static final Object NULL_VALUE
HashBasedTable
cannot contain nulls, used this object to represent null values.protected final List<ReadEntryWithPath> entries
protected final ParquetReaderConfig readerConfig
protected final String tableName
protected final org.apache.hadoop.fs.Path tableLocation
protected MetadataBase.ParquetTableMetadataBase parquetTableMetadata
protected Set<org.apache.hadoop.fs.Path> fileSet
protected TupleMetadata schema
protected DrillStatsTable statsTable
protected BaseParquetMetadataProvider(BaseParquetMetadataProvider.Builder<?> builder)
protected void init(BaseParquetMetadataProvider metadataProvider) throws IOException
IOException
public void initializeMetadata()
public NonInterestingColumnsMetadata getNonInterestingColumnsMetadata()
TableMetadataProvider
NonInterestingColumnsMetadata
instance which provides metadata for non-interesting columns.getNonInterestingColumnsMetadata
in interface TableMetadataProvider
NonInterestingColumnsMetadata
instancepublic TableMetadata getTableMetadata()
TableMetadataProvider
TableMetadata
instance which provides metadata for table and columns metadata.getTableMetadata
in interface TableMetadataProvider
TableMetadata
instancepublic List<SchemaPath> getPartitionColumns()
TableMetadataProvider
TableMetadataProvider
.getPartitionColumns
in interface TableMetadataProvider
public List<PartitionMetadata> getPartitionsMetadata()
TableMetadataProvider
PartitionMetadata
instances which provides metadata for specific partitions and its columns.getPartitionsMetadata
in interface TableMetadataProvider
PartitionMetadata
instancespublic List<PartitionMetadata> getPartitionMetadata(SchemaPath columnName)
TableMetadataProvider
PartitionMetadata
instances which corresponds to partitions for specified column
and provides metadata for specific partitions and its columns.getPartitionMetadata
in interface TableMetadataProvider
PartitionMetadata
instances which corresponds to partitions for specified columnpublic FileMetadata getFileMetadata(org.apache.hadoop.fs.Path location)
TableMetadataProvider
FileMetadata
instance which corresponds to metadata of file for specified location.getFileMetadata
in interface TableMetadataProvider
location
- location of the fileFileMetadata
instance which corresponds to metadata of file for specified locationpublic List<FileMetadata> getFilesForPartition(PartitionMetadata partition)
TableMetadataProvider
FileMetadata
instances which belongs to specified partitions.getFilesForPartition
in interface TableMetadataProvider
partition
- partition whichFileMetadata
instances which belongs to specified partitionspublic Map<org.apache.hadoop.fs.Path,SegmentMetadata> getSegmentsMetadataMap()
TableMetadataProvider
SegmentMetadata
instances which provides metadata for segment and its columns.getSegmentsMetadataMap
in interface TableMetadataProvider
SegmentMetadata
instancespublic Map<org.apache.hadoop.fs.Path,FileMetadata> getFilesMetadataMap()
TableMetadataProvider
FileMetadata
instances which provides metadata for specific file and its columns.getFilesMetadataMap
in interface TableMetadataProvider
FileMetadata
instancespublic List<ReadEntryWithPath> getEntries()
ParquetMetadataProvider
ReadEntryWithPath
instances which represents paths to files to be scanned.getEntries
in interface ParquetMetadataProvider
ReadEntryWithPath
instances whith file pathspublic Set<org.apache.hadoop.fs.Path> getFileSet()
ParquetMetadataProvider
getFileSet
in interface ParquetMetadataProvider
public List<RowGroupMetadata> getRowGroupsMeta()
ParquetMetadataProvider
RowGroupMetadata
instances which provides metadata for specific row group and its columns.getRowGroupsMeta
in interface ParquetMetadataProvider
RowGroupMetadata
instancespublic List<org.apache.hadoop.fs.Path> getLocations()
ParquetMetadataProvider
getLocations
in interface ParquetMetadataProvider
public org.apache.drill.shaded.guava.com.google.common.collect.Multimap<org.apache.hadoop.fs.Path,RowGroupMetadata> getRowGroupsMetadataMap()
ParquetMetadataProvider
RowGroupMetadata
instances which provides metadata
for specific row group and its columns mapped to their locations.getRowGroupsMetadataMap
in interface ParquetMetadataProvider
RowGroupMetadata
instancespublic boolean checkMetadataVersion()
TableMetadataProvider
checkMetadataVersion
in interface TableMetadataProvider
protected abstract void initInternal() throws IOException
IOException
Copyright © 2021 The Apache Software Foundation. All rights reserved.