/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.encoding;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.detect.TextStatistics;
import org.apache.tika.utils.CharsetUtils;
import org.jsoup.nodes.Element;
import org.jsoup.select.Evaluator;
import org.jsoup.select.QueryParser;
import org.jsoup.select.Selector;
import org.rypt.f8.Utf8Statistics;

class EncodingUtils {
    private static final int[] windows1252Illegals = new int[]{129, 141, 143, 144, 157};
    private static final int[] windows1250Illegals = new int[]{129, 131, 136, 144, 152};
    private static final int[] iso_8859_7Illegals = new int[]{174, 210, 255};
    private static final int[] windows1253Illegals = new int[]{129, 136, 138, 140, 141, 142, 143, 144, 152, 154, 156, 157, 158, 159, 170, 210, 255};
    private static final int[] windows1255Illegals = new int[]{129, 138, 140, 141, 142, 143, 144, 154, 156, 157, 158, 159, 202, 217, 218, 219, 220, 221, 222, 223, 251, 252, 255};
    private static final int[] iso_8859_8Illegals = new int[]{161, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 251, 252, 255};
    private static final int[] windows1254Illegals = new int[]{129, 141, 142, 143, 144, 157, 158};
    private static final int[] windows1251Illegals = new int[]{152};
    private static final int[] iso_8859_6Illegals = new int[]{161, 162, 163, 165, 166, 167, 168, 169, 170, 171, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 192, 219, 220, 221, 222, 223, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
    private static final Evaluator charsetMetas = QueryParser.parse((String)"meta[http-equiv=content-type], meta[charset]");
    private static final Pattern contentTypeCharsetPattern = Pattern.compile("(?i)\\bcharset\\s*=[\\s\"']*([^\\s,;\"']+)");
    private static final Pattern xmlEncoding = Pattern.compile("(?is)\\A\\s*<\\?\\s*xml\\s+[^<>]*encoding\\s*=\\s*(?:['\"]\\s*)?([-_:.a-z0-9]+)");

    EncodingUtils() {
    }

    static String iso_8859_1(InputStream is) throws IOException {
        int n;
        StringBuilder chars = new StringBuilder(Math.max(is.available(), 8192));
        byte[] buffer = new byte[8192];
        while ((n = is.read(buffer)) != -1) {
            for (int i = 0; i < n; ++i) {
                chars.append((char)(buffer[i] & 0xFF));
            }
        }
        return chars.toString();
    }

    static Charset correctVariant(TextStatistics stats, Charset charset) {
        if (charset == null) {
            return null;
        }
        switch (charset.name()) {
            case "ISO-8859-1": {
                if ((stats.count(13) != 0 || EncodingUtils.hasC1Control(stats)) && EncodingUtils.hasNoneOf(stats, windows1252Illegals)) {
                    try {
                        return EncodingUtils.forName("windows-1252");
                    }
                    catch (Exception exception) {
                        // empty catch block
                    }
                }
                return EncodingUtils.iso_8859_1_or_15(stats);
            }
            case "windows-1252": {
                return EncodingUtils.hasNoneOf(stats, windows1252Illegals) ? charset : EncodingUtils.iso_8859_1_or_15(stats);
            }
            case "ISO-8859-2": {
                if (EncodingUtils.hasC1Control(stats) && EncodingUtils.hasNoneOf(stats, windows1250Illegals)) {
                    try {
                        return EncodingUtils.forName("windows-1250");
                    }
                    catch (Exception exception) {
                        // empty catch block
                    }
                }
                return charset;
            }
            case "windows-1250": {
                return EncodingUtils.hasNoneOf(stats, windows1250Illegals) ? charset : EncodingUtils.charset("ISO-8859-2");
            }
            case "ISO-8859-7": {
                if (EncodingUtils.hasC1Control(stats) && EncodingUtils.hasNoneOf(stats, windows1253Illegals)) {
                    try {
                        return EncodingUtils.forName("windows-1253");
                    }
                    catch (Exception exception) {
                        // empty catch block
                    }
                }
                return EncodingUtils.hasNoneOf(stats, iso_8859_7Illegals) ? charset : null;
            }
            case "windows-1253": {
                return EncodingUtils.hasNoneOf(stats, windows1253Illegals) ? charset : (EncodingUtils.hasNoneOf(stats, iso_8859_7Illegals) ? EncodingUtils.charset("ISO-8859-7") : null);
            }
            case "ISO-8859-8": 
            case "ISO-8859-8-I": {
                if (EncodingUtils.hasC1Control(stats) && EncodingUtils.hasNoneOf(stats, windows1255Illegals)) {
                    try {
                        return EncodingUtils.forName("windows-1255");
                    }
                    catch (Exception exception) {
                        // empty catch block
                    }
                }
                return EncodingUtils.hasNoneOf(stats, iso_8859_8Illegals) ? charset : null;
            }
            case "windows-1255": {
                return EncodingUtils.hasNoneOf(stats, windows1255Illegals) ? charset : (EncodingUtils.hasNoneOf(stats, iso_8859_8Illegals) ? EncodingUtils.charset("ISO-8859-8") : null);
            }
            case "ISO-8859-9": {
                if (EncodingUtils.hasC1Control(stats) && EncodingUtils.hasNoneOf(stats, windows1254Illegals)) {
                    try {
                        return EncodingUtils.forName("windows-1254");
                    }
                    catch (Exception exception) {
                        // empty catch block
                    }
                }
                return charset;
            }
            case "windows-1254": {
                return EncodingUtils.hasNoneOf(stats, windows1254Illegals) ? charset : EncodingUtils.charset("ISO-8859-9");
            }
            case "windows-1251": {
                return EncodingUtils.hasNoneOf(stats, windows1251Illegals) ? charset : null;
            }
            case "ISO-8859-6": {
                return EncodingUtils.hasNoneOf(stats, iso_8859_6Illegals) ? charset : null;
            }
        }
        return charset;
    }

    private static Charset iso_8859_1_or_15(TextStatistics stats) {
        if (stats.count(164) != 0) {
            try {
                return EncodingUtils.forName("ISO-8859-15");
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
        return StandardCharsets.ISO_8859_1;
    }

    private static boolean hasNoneOf(TextStatistics stats, int[] illegals) {
        for (int i : illegals) {
            if (stats.count(i) == 0) continue;
            return false;
        }
        return true;
    }

    private static boolean hasC1Control(TextStatistics ts) {
        for (int i = 128; i < 160; ++i) {
            if (ts.count(i) == 0) continue;
            return true;
        }
        return false;
    }

    static TextStatistics stats(InputStream stream) throws IOException {
        int n;
        TextStatisticsOptimizedForUtf8 stats = new TextStatisticsOptimizedForUtf8();
        byte[] buffer = new byte[8192];
        while ((n = stream.read(buffer)) != -1) {
            stats.addData(buffer, 0, n);
        }
        return stats;
    }

    static Charset forName(String charset) throws Exception {
        try {
            return CharsetUtils.forName((String)charset);
        }
        catch (Exception e) {
            charset = charset.replaceAll("(?i)-I\\b", "");
            try {
                return CharsetUtils.forName((String)charset);
            }
            catch (Exception ignored) {
                throw e;
            }
        }
    }

    private static Charset charset(String charset) {
        try {
            return EncodingUtils.forName(charset);
        }
        catch (Exception e) {
            return null;
        }
    }

    static Charset htmlCharset(TextStatistics stats, Element root) {
        for (Element meta : Selector.select((Evaluator)charsetMetas, (Element)root)) {
            Charset foundCharset = EncodingUtils.correctVariant(stats, EncodingUtils.charset(meta.attr("charset")));
            if (foundCharset != null) {
                return foundCharset;
            }
            foundCharset = EncodingUtils.correctVariant(stats, EncodingUtils.contentTypeCharset(meta.attr("content")));
            if (foundCharset == null) continue;
            return foundCharset;
        }
        return null;
    }

    static Charset contentTypeCharset(CharSequence contentType) {
        if (contentType == null) {
            return null;
        }
        Matcher m = contentTypeCharsetPattern.matcher(contentType);
        if (m.find()) {
            try {
                return EncodingUtils.forName(m.group(1));
            }
            catch (Exception e) {
                return null;
            }
        }
        return null;
    }

    static Charset xmlCharset(TextStatistics stats, CharSequence str) {
        Matcher matcher = xmlEncoding.matcher(str);
        if (matcher.find()) {
            return EncodingUtils.correctVariant(stats, EncodingUtils.charset(matcher.group(1)));
        }
        return null;
    }

    private static class TextStatisticsOptimizedForUtf8
    extends TextStatistics {
        private final Utf8Statistics utf8Stats = new Utf8Statistics();

        private TextStatisticsOptimizedForUtf8() {
        }

        public void addData(byte[] buffer, int offset, int length) {
            super.addData(buffer, offset, length);
            this.utf8Stats.write(buffer, offset, length);
        }

        public boolean looksLikeUTF8() {
            return this.utf8Stats.looksLikeUtf8();
        }
    }
}

