/*
 * Decompiled with CFR 0.152.
 */
package org.apache.flink.formats.parquet;

import java.io.IOException;
import java.util.ArrayList;
import javax.annotation.Nullable;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.CheckpointableInputFormat;
import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileInputSplit;
import org.apache.flink.core.fs.Path;
import org.apache.flink.formats.parquet.utils.ParquetRecordReader;
import org.apache.flink.formats.parquet.utils.ParquetSchemaConverter;
import org.apache.flink.formats.parquet.utils.RowReadSupport;
import org.apache.flink.metrics.Counter;
import org.apache.flink.types.Row;
import org.apache.flink.util.Preconditions;
import org.apache.parquet.ParquetReadOptions;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.io.InputFile;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class ParquetInputFormat<E>
extends FileInputFormat<E>
implements CheckpointableInputFormat<FileInputSplit, Tuple2<Long, Long>> {
    private static final long serialVersionUID = 1L;
    private static final Logger LOG = LoggerFactory.getLogger(ParquetInputFormat.class);
    private boolean skipWrongSchemaFileSplit = false;
    private boolean skipCorruptedRecord = false;
    private boolean skipThisSplit = false;
    @Nullable
    private TypeInformation[] fieldTypes;
    @Nullable
    private String[] fieldNames;
    private FilterPredicate filterPredicate;
    private transient Counter recordConsumed;
    @Nullable
    private transient MessageType expectedFileSchema;
    private transient ParquetRecordReader<Row> parquetRecordReader;
    public static final String PARQUET_SKIP_WRONG_SCHEMA_SPLITS = "skip.splits.wrong.schema";
    public static final String PARQUET_SKIP_CORRUPTED_RECORD = "skip.corrupted.record";

    protected ParquetInputFormat(Path path, MessageType messageType) {
        super(path);
        this.expectedFileSchema = messageType;
        if (this.expectedFileSchema != null) {
            RowTypeInfo rowTypeInfo = (RowTypeInfo)ParquetSchemaConverter.fromParquetType(this.expectedFileSchema);
            this.fieldTypes = rowTypeInfo.getFieldTypes();
            this.fieldNames = rowTypeInfo.getFieldNames();
        }
        this.unsplittable = true;
    }

    public void configure(Configuration parameters) {
        super.configure(parameters);
        if (!this.skipWrongSchemaFileSplit) {
            this.skipWrongSchemaFileSplit = parameters.getBoolean(PARQUET_SKIP_WRONG_SCHEMA_SPLITS, false);
        }
        if (this.skipCorruptedRecord) {
            this.skipCorruptedRecord = parameters.getBoolean(PARQUET_SKIP_CORRUPTED_RECORD, false);
        }
    }

    public void selectFields(String[] fieldNames) {
        Preconditions.checkNotNull((Object)fieldNames, (String)"fieldNames");
        this.fieldNames = fieldNames;
        if (this.expectedFileSchema != null) {
            this.fieldTypes = this.getFieldTypesFromSchema(fieldNames, this.expectedFileSchema);
        }
    }

    private TypeInformation[] getFieldTypesFromSchema(String[] fieldNames, MessageType schema) {
        RowTypeInfo rowTypeInfo = (RowTypeInfo)ParquetSchemaConverter.fromParquetType(schema);
        TypeInformation[] selectFieldTypes = new TypeInformation[fieldNames.length];
        for (int i = 0; i < fieldNames.length; ++i) {
            try {
                selectFieldTypes[i] = rowTypeInfo.getTypeAt(fieldNames[i]);
                continue;
            }
            catch (IndexOutOfBoundsException e) {
                throw new IllegalArgumentException(String.format("Fail to access Field %s , which is not contained in the file schema", fieldNames[i]), e);
            }
        }
        return selectFieldTypes;
    }

    public void setFilterPredicate(FilterPredicate filterPredicate) {
        this.filterPredicate = filterPredicate;
    }

    public Tuple2<Long, Long> getCurrentState() {
        return this.parquetRecordReader.getCurrentReadPosition();
    }

    public void open(FileInputSplit split) throws IOException {
        this.skipThisSplit = false;
        org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
        HadoopInputFile inputFile = HadoopInputFile.fromPath((org.apache.hadoop.fs.Path)new org.apache.hadoop.fs.Path(split.getPath().toUri()), (org.apache.hadoop.conf.Configuration)configuration);
        ParquetReadOptions options = ParquetReadOptions.builder().build();
        ParquetFileReader fileReader = new ParquetFileReader((InputFile)inputFile, options);
        MessageType fileSchema = fileReader.getFileMetaData().getSchema();
        if (this.expectedFileSchema == null) {
            if (this.fieldNames == null) {
                RowTypeInfo rowTypeInfo = (RowTypeInfo)ParquetSchemaConverter.fromParquetType(fileSchema);
                this.fieldNames = rowTypeInfo.getFieldNames();
                this.fieldTypes = rowTypeInfo.getFieldTypes();
            } else {
                this.fieldTypes = this.getFieldTypesFromSchema(this.fieldNames, fileSchema);
            }
        }
        MessageType readSchema = this.getReadSchema(fileSchema, split.getPath());
        if (this.skipThisSplit) {
            LOG.warn(String.format("Escaped the file split [%s] due to mismatch of file schema to expected result schema", split.getPath().toString()));
        } else {
            this.parquetRecordReader = new ParquetRecordReader<Row>(new RowReadSupport(), readSchema, this.filterPredicate == null ? FilterCompat.NOOP : FilterCompat.get((FilterPredicate)this.filterPredicate));
            this.parquetRecordReader.initialize(fileReader, configuration);
            this.parquetRecordReader.setSkipCorruptedRecord(this.skipCorruptedRecord);
            if (this.recordConsumed == null) {
                this.recordConsumed = this.getRuntimeContext().getMetricGroup().counter("parquet-records-consumed");
            }
            LOG.debug(String.format("Open ParquetInputFormat with FileInputSplit [%s]", split.getPath().toString()));
        }
    }

    public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException {
        Preconditions.checkNotNull((Object)split, (String)"reopen() cannot be called on a null split.");
        Preconditions.checkNotNull(state, (String)"reopen() cannot be called with a null initial state.");
        this.open(split);
        this.parquetRecordReader.seek((Long)state.f0, (Long)state.f1);
    }

    protected String[] getFieldNames() {
        return this.fieldNames;
    }

    protected TypeInformation[] getFieldTypes() {
        return this.fieldTypes;
    }

    @VisibleForTesting
    protected FilterPredicate getPredicate() {
        return this.filterPredicate;
    }

    public void close() throws IOException {
        if (this.parquetRecordReader != null) {
            this.parquetRecordReader.close();
        }
    }

    public boolean reachedEnd() throws IOException {
        if (this.skipThisSplit) {
            return true;
        }
        return this.parquetRecordReader.reachEnd();
    }

    public E nextRecord(E e) throws IOException {
        if (this.reachedEnd()) {
            return null;
        }
        this.recordConsumed.inc();
        return this.convert(this.parquetRecordReader.nextRecord());
    }

    protected abstract E convert(Row var1);

    private MessageType getReadSchema(MessageType fileSchema, Path filePath) {
        RowTypeInfo fileTypeInfo = (RowTypeInfo)ParquetSchemaConverter.fromParquetType(fileSchema);
        ArrayList<Type> types = new ArrayList<Type>();
        for (int i = 0; i < this.fieldNames.length; ++i) {
            String readFieldName = this.fieldNames[i];
            TypeInformation readFieldType = this.fieldTypes[i];
            if (fileTypeInfo.getFieldIndex(readFieldName) < 0) {
                if (!this.skipWrongSchemaFileSplit) {
                    throw new IllegalArgumentException("Field " + readFieldName + " cannot be found in schema of  Parquet file: " + filePath + ".");
                }
                this.skipThisSplit = true;
                return fileSchema;
            }
            if (!readFieldType.equals((Object)fileTypeInfo.getTypeAt(readFieldName))) {
                if (!this.skipWrongSchemaFileSplit) {
                    throw new IllegalArgumentException("Expecting type " + readFieldType + " for field " + readFieldName + " but found type " + fileTypeInfo.getTypeAt(readFieldName) + " in Parquet file: " + filePath + ".");
                }
                this.skipThisSplit = true;
                return fileSchema;
            }
            types.add(fileSchema.getType(readFieldName));
        }
        return new MessageType(fileSchema.getName(), types);
    }
}

