package com.aliyun.odps.udf.example.text;

import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsType;
import com.aliyun.odps.data.ArrayRecord;
import com.aliyun.odps.data.Binary;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.io.InputStreamSet;
import com.aliyun.odps.io.SourceInputStream;
import com.aliyun.odps.udf.DataAttributes;
import com.aliyun.odps.udf.ExecutionContext;
import com.aliyun.odps.udf.Extractor;
import com.aliyun.odps.utils.StringUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.math.BigDecimal;
import java.sql.Date;
import java.util.ArrayList;
import java.util.zip.GZIPInputStream;
import org.apache.commons.lang.StringEscapeUtils;

/* loaded from: input_file:com/aliyun/odps/udf/example/text/TextExtractor.class */
public class TextExtractor extends Extractor {
    private InputStreamSet inputs;
    private char delimiterChar;
    private DataAttributes attributes;
    private Reader currentReader;
    private Column[] outputColumns;
    private Column[] fullSchemaColumns;
    private String[] lineParts;
    private OdpsType[] outputTypes;
    private ArrayRecord record;
    private int[] outputIndexes;
    private ExecutionContext ctx;
    private boolean firstRead = true;
    private boolean strict = true;
    private boolean ignoreLineFeed = true;
    private boolean handleQuote = true;
    private boolean allColumnsPruned = false;
    private final ArrayRecord emptyRecord = new ArrayRecord(new Column[0]);
    private final ArrayList<String> emptyList = new ArrayList<>(0);
    private char linebreakChar = '\n';
    private boolean complexText = false;
    private boolean isGzip = false;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: com.aliyun.odps.udf.example.text.TextExtractor$1, reason: invalid class name */
    /* loaded from: input_file:com/aliyun/odps/udf/example/text/TextExtractor$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$com$aliyun$odps$OdpsType = new int[OdpsType.values().length];

        static {
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.STRING.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.BIGINT.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.BOOLEAN.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.DOUBLE.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.FLOAT.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.BINARY.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.DATETIME.ordinal()] = 7;
            } catch (NoSuchFieldError e7) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.DECIMAL.ordinal()] = 8;
            } catch (NoSuchFieldError e8) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.TINYINT.ordinal()] = 9;
            } catch (NoSuchFieldError e9) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.INT.ordinal()] = 10;
            } catch (NoSuchFieldError e10) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.SMALLINT.ordinal()] = 11;
            } catch (NoSuchFieldError e11) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.CHAR.ordinal()] = 12;
            } catch (NoSuchFieldError e12) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.VARCHAR.ordinal()] = 13;
            } catch (NoSuchFieldError e13) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.ARRAY.ordinal()] = 14;
            } catch (NoSuchFieldError e14) {
            }
            try {
                $SwitchMap$com$aliyun$odps$OdpsType[OdpsType.MAP.ordinal()] = 15;
            } catch (NoSuchFieldError e15) {
            }
        }
    }

    public void setup(ExecutionContext executionContext, InputStreamSet inputStreamSet, DataAttributes dataAttributes) {
        this.inputs = inputStreamSet;
        this.attributes = dataAttributes;
        this.ctx = executionContext;
        String valueByKey = this.attributes.getValueByKey("delimiter");
        if (valueByKey == null) {
            this.delimiterChar = ',';
        } else {
            if (valueByKey.length() != 1) {
                throw new IllegalArgumentException("column delimiter cannot be more than one character, sees: " + valueByKey);
            }
            this.delimiterChar = valueByKey.charAt(0);
        }
        String valueByKey2 = dataAttributes.getValueByKey("line.terminator");
        if (valueByKey2 != null && !valueByKey2.isEmpty()) {
            if (valueByKey2.length() > 1) {
                throw new IllegalArgumentException("line terminator cannot be more than one character, sees: " + valueByKey2);
            }
            this.linebreakChar = valueByKey2.charAt(0);
        }
        String valueByKey3 = this.attributes.getValueByKey("odps.text.option.complex.text.enabled");
        if (valueByKey3 != null && valueByKey3.toLowerCase().equals("true")) {
            this.complexText = true;
        }
        String valueByKey4 = this.attributes.getValueByKey("odps.text.option.gzip.input.enabled");
        if (valueByKey4 != null && valueByKey4.toLowerCase().equals("true")) {
            this.isGzip = true;
        }
        String valueByKey5 = dataAttributes.getValueByKey("odps.text.option.strict.mode");
        if (!StringUtils.isNullOrEmpty(valueByKey5)) {
            this.strict = Boolean.valueOf(valueByKey5).booleanValue();
        }
        String valueByKey6 = dataAttributes.getValueByKey("odps.text.option.ignore.line.feed");
        if (!StringUtils.isNullOrEmpty(valueByKey6)) {
            this.ignoreLineFeed = Boolean.valueOf(valueByKey6).booleanValue();
        }
        String valueByKey7 = dataAttributes.getValueByKey("odps.text.option.quote.enable");
        if (!StringUtils.isNullOrEmpty(valueByKey7)) {
            this.handleQuote = Boolean.valueOf(valueByKey7).booleanValue();
        }
        System.out.println(StringEscapeUtils.escapeJava("TextExtractor set up with delimiter [" + this.delimiterChar + "],  line terminator [" + this.linebreakChar + "], with complex text flag set to " + this.complexText + " and reading gzip file set to " + this.isGzip));
        this.outputColumns = this.attributes.getRecordColumns();
        this.outputTypes = new OdpsType[this.outputColumns.length];
        for (int i = 0; i < this.outputTypes.length; i++) {
            this.outputTypes[i] = this.outputColumns[i].getType();
        }
        this.fullSchemaColumns = this.attributes.getFullTableColumns();
        this.lineParts = new String[this.fullSchemaColumns.length];
        this.record = new ArrayRecord(this.outputColumns);
        this.outputIndexes = this.attributes.getNeededIndexes();
        if (this.outputIndexes == null || this.outputIndexes.length == 0) {
            this.allColumnsPruned = true;
        }
        if (!this.allColumnsPruned && this.outputIndexes.length != this.outputColumns.length) {
            throw new IllegalArgumentException("Mismatched output schema: Expecting " + this.outputColumns.length + " columns but get " + this.outputIndexes.length);
        }
    }

    public Record extract() throws IOException {
        String[] readNextLine;
        do {
            readNextLine = readNextLine();
            if (readNextLine == null) {
                return null;
            }
            if (this.allColumnsPruned) {
                return this.emptyRecord;
            }
        } while (readNextLine.length == 0);
        return textLineToRecord(readNextLine);
    }

    public void close() {
    }

    private Record textLineToRecord(String[] strArr) throws IllegalArgumentException, IOException {
        if (this.outputColumns.length != 0) {
            int i = 0;
            for (int i2 = 0; i2 < strArr.length; i2++) {
                if (i < this.outputIndexes.length && i2 == this.outputIndexes[i]) {
                    if (strArr[i2].equals("NULL")) {
                        this.record.set(i, (Object) null);
                        i++;
                    } else {
                        switch (AnonymousClass1.$SwitchMap$com$aliyun$odps$OdpsType[this.outputTypes[i].ordinal()]) {
                            case 1:
                                this.record.set(i, strArr[i2]);
                                break;
                            case 2:
                                this.record.setBigint(i, Long.valueOf(Long.parseLong(strArr[i2])));
                                break;
                            case 3:
                                this.record.setBoolean(i, Boolean.valueOf(Boolean.parseBoolean(strArr[i2])));
                                break;
                            case 4:
                                this.record.setDouble(i, Double.valueOf(Double.parseDouble(strArr[i2])));
                                break;
                            case 5:
                                this.record.setFloat(i, Float.valueOf(Float.parseFloat(strArr[i2])));
                                break;
                            case 6:
                                this.record.setBinary(i, new Binary(strArr[i2].getBytes()));
                                break;
                            case 7:
                                this.record.setDate(i, Date.valueOf(strArr[i2]));
                                break;
                            case 8:
                                this.record.setDecimal(i, new BigDecimal(strArr[i2]));
                                break;
                            case 9:
                            case 10:
                            case 11:
                                this.record.setInt(i, Integer.valueOf(Integer.parseInt(strArr[i2])));
                                break;
                            case 12:
                            case 13:
                            case 14:
                            case 15:
                            default:
                                throw new IllegalArgumentException("Type " + this.outputTypes[i] + " not supported for now.");
                        }
                        i++;
                    }
                }
            }
        }
        return this.record;
    }

    public String[] parseLine(Reader reader) throws IOException {
        int read = reader.read();
        if (!this.allColumnsPruned) {
            boolean z = true;
            int i = 0;
            if (this.ignoreLineFeed) {
                while (read == 13) {
                    read = reader.read();
                }
            }
            if (read < 0) {
                return null;
            }
            StringBuffer stringBuffer = new StringBuffer();
            boolean z2 = false;
            boolean z3 = false;
            while (true) {
                if (read < 0) {
                    break;
                }
                if (z2) {
                    z3 = true;
                    if (read == 34) {
                        z2 = false;
                    } else {
                        stringBuffer.append((char) read);
                    }
                } else if (read == 34 && this.handleQuote) {
                    z2 = true;
                    if (z3) {
                        stringBuffer.append('\"');
                    }
                } else if (read == this.delimiterChar && !z3) {
                    int i2 = i;
                    i++;
                    setLinePart(i2, stringBuffer.toString());
                    stringBuffer = new StringBuffer();
                    z3 = false;
                } else if (read != 13 || !this.ignoreLineFeed) {
                    if (read != this.linebreakChar) {
                        stringBuffer.append((char) read);
                        z = false;
                    } else if (z) {
                        return new String[0];
                    }
                }
                read = reader.read();
            }
            int i3 = i;
            int i4 = i + 1;
            setLinePart(i3, stringBuffer.toString());
            if (i4 != this.fullSchemaColumns.length) {
                handleMismatchLine(i4);
            }
            return this.lineParts;
        }
        while (read != this.linebreakChar) {
            read = reader.read();
            if (read < 0) {
                return null;
            }
        }
        return new String[0];
    }

    private void setLinePart(int i, String str) {
        if (i >= this.fullSchemaColumns.length) {
            handleMismatchLine(i);
        } else {
            this.lineParts[i] = str;
        }
    }

    private void handleMismatchLine(int i) {
        if (i < this.fullSchemaColumns.length) {
            this.ctx.getCounter("text.parse", "schema.partial").increment(1L);
            for (int i2 = i; i2 < this.fullSchemaColumns.length; i2++) {
                this.lineParts[i2] = "NULL";
            }
        } else {
            this.ctx.getCounter("text.parse", "schema.oversize").increment(1L);
        }
        String escapeJava = StringEscapeUtils.escapeJava("SCHEMA MISMATCH: External Table schema specified a total of [" + this.fullSchemaColumns.length + "] columns, but current text line parsed into [" + i + "] columns delimited by [" + this.delimiterChar + "]. Current line is read as: " + StringUtils.join(this.lineParts, this.delimiterChar));
        if (this.strict) {
            throw new RuntimeException(escapeJava);
        }
        System.err.println(escapeJava);
    }

    private String[] readNextLine() throws IOException {
        if (this.firstRead) {
            this.firstRead = false;
            this.currentReader = moveToNextStream();
            if (this.currentReader == null) {
                return null;
            }
        }
        while (this.currentReader != null) {
            if (this.complexText) {
                String[] parseLine = parseLine(this.currentReader);
                if (parseLine != null) {
                    return parseLine;
                }
            } else {
                String readLine = ((BufferedReader) this.currentReader).readLine();
                if (readLine != null) {
                    return StringUtils.splitPreserveAllTokens(readLine, this.delimiterChar);
                }
            }
            this.currentReader = moveToNextStream();
        }
        return null;
    }

    private Reader moveToNextStream() throws IOException {
        SourceInputStream next = this.inputs.next();
        if (next == null) {
            return null;
        }
        long splitSize = next.getSplitSize();
        if (next.getFileSize() == splitSize) {
            System.out.println("Processing whole file: " + next.getFileName());
            return new BufferedReader(this.isGzip ? new InputStreamReader(new GZIPInputStream(next)) : new InputStreamReader(next));
        }
        this.complexText = true;
        long currentPos = next.getCurrentPos();
        long splitStart = next.getSplitStart();
        if (currentPos < splitStart) {
            System.out.println("Skipping: " + (splitStart - currentPos) + " bytes to split start.");
            next.skip(splitStart - currentPos);
            currentPos = next.getCurrentPos();
        }
        System.out.println("Processing bytes [" + currentPos + " , " + ((currentPos + splitSize) - 1) + "] for file " + next.getFileName());
        return new SplitReader(new BufferedReader(new InputStreamReader(next)), splitSize);
    }
}
