/*
 * Decompiled with CFR 0.152.
 */
package com.marklogic.contentpump;

import com.marklogic.contentpump.ContentWithFileNameWritable;
import com.marklogic.contentpump.ImportRecordReader;
import com.marklogic.contentpump.utilities.CSVParserFormatter;
import com.marklogic.contentpump.utilities.DocBuilder;
import com.marklogic.contentpump.utilities.EncodingUtil;
import com.marklogic.contentpump.utilities.FileIterator;
import com.marklogic.contentpump.utilities.IdGenerator;
import com.marklogic.contentpump.utilities.JSONDocBuilder;
import com.marklogic.contentpump.utilities.XMLDocBuilder;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Iterator;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class DelimitedTextReader<VALUEIN>
extends ImportRecordReader<VALUEIN> {
    public static final Log LOG = LogFactory.getLog(DelimitedTextReader.class);
    public static final char encapsulator = '\"';
    protected static final String INVALID_TOKEN_DELIMITER_ERROR = "Invalid character between encapsulated token and delimiter";
    protected String[] fields;
    protected char delimiter;
    protected CSVParser parser;
    protected InputStreamReader instream;
    protected FSDataInputStream fileIn;
    protected boolean hasNext = true;
    protected String uriName;
    protected long fileLen = Long.MAX_VALUE;
    protected long bytesRead;
    protected boolean generateId;
    protected IdGenerator idGen;
    protected int uriId = -1;
    protected boolean compressed;
    protected DocBuilder docBuilder;
    protected Iterator<CSVRecord> parserIterator;
    private int prevLineNumber = 1;

    @Override
    public void close() throws IOException {
        if (this.instream != null) {
            this.instream.close();
        }
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        return this.bytesRead / this.fileLen;
    }

    @Override
    public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException {
        this.initConfig(context);
        this.initDocType();
        this.initDelimConf();
        this.setFile(((FileSplit)inSplit).getPath());
        this.fs = this.file.getFileSystem(context.getConfiguration());
        FileStatus status = this.fs.getFileStatus(this.file);
        if (status.isDirectory()) {
            this.iterator = new FileIterator((FileSplit)inSplit, context);
            inSplit = (InputSplit)this.iterator.next();
        }
        this.initParser(inSplit);
    }

    protected void initParser(InputSplit inSplit) throws IOException, InterruptedException {
        this.fileIn = this.openFile(inSplit, true);
        if (this.fileIn == null) {
            return;
        }
        this.instream = new InputStreamReader((InputStream)this.fileIn, this.encoding);
        this.bytesRead = 0L;
        this.fileLen = inSplit.getLength();
        if (this.uriName == null) {
            this.generateId = this.conf.getBoolean("mapreduce.marklogic.input.generateuri", false);
            if (this.generateId) {
                this.idGen = new IdGenerator(this.file.toUri().getPath() + "-" + ((FileSplit)inSplit).getStart());
            } else {
                this.uriId = 0;
            }
        }
        this.parser = new CSVParser((Reader)this.instream, CSVParserFormatter.getFormat(this.delimiter, Character.valueOf('\"'), true, true));
        this.parserIterator = this.parser.iterator();
    }

    protected void initDelimConf() {
        String delimStr = this.conf.get("mapreduce.marklogic.delimited.delimiter", ",");
        if (delimStr.length() != 1) {
            throw new UnsupportedOperationException("Invalid delimiter: " + delimStr);
        }
        this.delimiter = delimStr.charAt(0);
        this.uriName = this.conf.get("mapreduce.marklogic.input.uriid", null);
        this.docBuilder.init(this.conf);
    }

    protected String[] getLine() throws IOException {
        return this.getLine(this.getRecordLine());
    }

    protected String[] getLine(CSVRecord record) throws IOException {
        Iterator recordIterator = record.iterator();
        int recordSize = record.size();
        String[] values = new String[recordSize];
        for (int i = 0; i < recordSize; ++i) {
            if (!recordIterator.hasNext()) {
                throw new IOException("Record size doesn't match the real size");
            }
            values[i] = (String)recordIterator.next();
        }
        return values;
    }

    protected CSVRecord getRecordLine() {
        return this.parserIterator.next();
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (this.parser == null || this.parserIterator == null) {
            return false;
        }
        try {
            int line;
            int i;
            if (!this.parserIterator.hasNext()) {
                if (this.compressed) {
                    this.bytesRead = this.fileLen;
                    return false;
                }
                if (this.iterator != null && this.iterator.hasNext()) {
                    this.close();
                    this.initParser((InputSplit)this.iterator.next());
                    return this.nextKeyValue();
                }
                this.bytesRead = this.fileLen;
                return false;
            }
            String[] values = this.getLine();
            if (this.fields == null) {
                this.fields = values;
                if (Charset.defaultCharset().equals(Charset.forName("UTF-8"))) {
                    EncodingUtil.handleBOMUTF8(this.fields, 0);
                }
                boolean found = this.generateId || this.uriId == 0;
                for (i = 0; i < this.fields.length && !found; ++i) {
                    if (!this.fields[i].equals(this.uriName)) continue;
                    this.uriId = i;
                    found = true;
                    break;
                }
                if (!found) {
                    LOG.error((Object)("Skipped file: " + this.file.toUri() + ", reason: uri_id " + this.uriName + " is not found"));
                    this.parser = null;
                    return false;
                }
                try {
                    this.docBuilder.configFields(this.conf, this.fields);
                }
                catch (IllegalArgumentException e) {
                    LOG.error((Object)("Skipped file: " + this.file.toUri() + ", reason: " + e.getMessage()));
                    this.parser = null;
                    return false;
                }
                if (!this.parserIterator.hasNext()) {
                    if (this.compressed) {
                        this.bytesRead = this.fileLen;
                        return false;
                    }
                    if (this.iterator != null && this.iterator.hasNext()) {
                        this.close();
                        this.initParser((InputSplit)this.iterator.next());
                        return this.nextKeyValue();
                    }
                    this.bytesRead = this.fileLen;
                    return false;
                }
                values = this.getLine();
            }
            if ((line = (int)this.parser.getCurrentLineNumber()) == this.prevLineNumber) {
                ++line;
            }
            this.prevLineNumber = line;
            if (values.length != this.fields.length) {
                this.setSkipKey(line, 0, "number of fields does not match number of columns");
                return true;
            }
            this.docBuilder.newDoc();
            for (i = 0; i < this.fields.length; ++i) {
                if (this.fields[i].equals("")) continue;
                if (!this.generateId && this.uriId == i && this.setKey(values[i], line, 0, true)) {
                    return true;
                }
                try {
                    this.docBuilder.put(this.fields[i], values[i]);
                    continue;
                }
                catch (Exception e) {
                    this.setSkipKey(line, 0, e.getMessage());
                    return true;
                }
            }
            this.docBuilder.build();
            if (this.generateId && this.setKey(this.idGen.incrementAndGet(), line, 0, true)) {
                return true;
            }
            if (this.value instanceof Text) {
                ((Text)this.value).set(this.docBuilder.getDoc());
            } else {
                ((Text)((ContentWithFileNameWritable)this.value).getValue()).set(this.docBuilder.getDoc());
            }
        }
        catch (RuntimeException ex) {
            String message = ex.getMessage();
            if (message != null && message.contains(INVALID_TOKEN_DELIMITER_ERROR)) {
                this.setSkipKey((int)this.parser.getCurrentLineNumber(), 0, INVALID_TOKEN_DELIMITER_ERROR);
                if (this.parserIterator.hasNext()) {
                    this.parserIterator.next();
                }
            }
            throw ex;
        }
        return true;
    }

    protected String convertToLine(String[] values) {
        StringBuilder sb = new StringBuilder();
        for (String s : values) {
            sb.append(s);
            sb.append(this.delimiter);
        }
        return sb.substring(0, sb.length() - 1);
    }

    protected void initDocType() {
        String docType = this.conf.get("mapreduce.marklogic.output.content.type", "XML");
        this.docBuilder = docType.equals("XML") ? new XMLDocBuilder() : new JSONDocBuilder();
    }
}

