package kd.bos.gptas.km;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.jsoup.UncheckedIOException;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.XmlDeclaration;
import org.jsoup.parser.Parser;

/* loaded from: input_file:kd/bos/gptas/km/DataUtil.class */
public class DataUtil {
    private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*(?:[\"'])?([^\\s,;\"']*)");
    public static final Charset UTF_8 = StandardCharsets.UTF_8;
    static final String defaultCharsetName = UTF_8.name();

    public static Document parseHtml(String str, @Nullable String str2, String str3, Parser parser) throws IOException {
        try {
            Document parseInput = parser.parseInput(str, str3);
            String str4 = null;
            Iterator it = parseInput.select("meta[http-equiv=content-type], meta[charset]").iterator();
            while (it.hasNext()) {
                Element element = (Element) it.next();
                if (element.hasAttr("http-equiv")) {
                    str4 = getCharsetFromContentType(element.attr("content"));
                }
                if (str4 == null && element.hasAttr("charset")) {
                    str4 = element.attr("charset");
                }
                if (str4 != null) {
                    break;
                }
            }
            if (str4 == null && parseInput.childNodeSize() > 0) {
                XmlDeclaration childNode = parseInput.childNode(0);
                XmlDeclaration xmlDeclaration = null;
                if (childNode instanceof XmlDeclaration) {
                    xmlDeclaration = childNode;
                } else if (childNode instanceof Comment) {
                    Comment comment = (Comment) childNode;
                    if (comment.isXmlDeclaration()) {
                        xmlDeclaration = comment.asXmlDeclaration();
                    }
                }
                if (xmlDeclaration != null && xmlDeclaration.name().equalsIgnoreCase("xml")) {
                    str4 = xmlDeclaration.attr("encoding");
                }
            }
            String validateCharset = validateCharset(str4);
            if (validateCharset != null && !validateCharset.equalsIgnoreCase(defaultCharsetName)) {
                str2 = validateCharset.trim().replaceAll("[\"']", "");
                parseInput = null;
            }
            if (parseInput == null) {
                if (str2 == null) {
                    str2 = defaultCharsetName;
                }
                try {
                    parseInput = parser.parseInput(str, str3);
                    Charset forName = str2.equals(defaultCharsetName) ? UTF_8 : Charset.forName(str2);
                    parseInput.outputSettings().charset(forName);
                    if (!forName.canEncode()) {
                        parseInput.charset(UTF_8);
                    }
                } catch (UncheckedIOException e) {
                    throw e.ioException();
                }
            }
            return parseInput;
        } catch (UncheckedIOException e2) {
            throw e2.ioException();
        }
    }

    @Nullable
    private static String validateCharset(@Nullable String str) {
        if (str == null || str.isEmpty()) {
            return null;
        }
        String replaceAll = str.trim().replaceAll("[\"']", "");
        try {
            if (Charset.isSupported(replaceAll)) {
                return replaceAll;
            }
            String upperCase = replaceAll.toUpperCase(Locale.ENGLISH);
            if (Charset.isSupported(upperCase)) {
                return upperCase;
            }
            return null;
        } catch (IllegalCharsetNameException e) {
            return null;
        }
    }

    @Nullable
    static String getCharsetFromContentType(@Nullable String str) {
        if (str == null) {
            return null;
        }
        Matcher matcher = charsetPattern.matcher(str);
        if (matcher.find()) {
            return validateCharset(matcher.group(1).trim().replace("charset=", ""));
        }
        return null;
    }
}
