package com.samsung.informationextraction.extractor;

import com.sec.spp.push.Config;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import net.htmlparser.jericho.HTMLElementName;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.DomSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.PrettyXmlSerializer;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.TagTransformation;
import org.htmlcleaner.audit.HtmlModificationListenerLogger;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: classes2.dex */
public class ExtractorUtils {
    private static final int HTML_SYMBOL_CODE_LENGTH = 4;
    private static final int HTML_SYMBOL_MARKER = 5;
    static HtmlCleaner sCleaner;

    static Document createDocumentFromXmlString(TagNode tagNode) {
        try {
            return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(new StringReader(new PrettyXmlSerializer(sCleaner.getProperties()).getAsString(tagNode))));
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        } catch (ParserConfigurationException e2) {
            e2.printStackTrace();
            return null;
        } catch (SAXException e3) {
            e3.printStackTrace();
            return null;
        }
    }

    public static Document getDocumentFromHtml(String str, String str2) throws ParserConfigurationException, IllegalArgumentException, IOException {
        return getDocumentFromHtml(str, null, str2);
    }

    private static Document getDocumentFromHtml(String str, URL url, String str2) throws ParserConfigurationException, IllegalArgumentException, IOException {
        TagNode clean;
        if (sCleaner == null) {
            sCleaner = new HtmlCleaner();
            CleanerProperties properties = sCleaner.getProperties();
            properties.setOmitDoctypeDeclaration(true);
            properties.setOmitComments(true);
            properties.setNamespacesAware(false);
            properties.addHtmlModificationListener(new HtmlModificationListenerLogger(Extractor.sLogger));
            properties.getCleanerTransformations().addTransformation(new TagTransformation(HTMLElementName.HTML, HTMLElementName.HTML, false));
        }
        if (str != null && !str.isEmpty()) {
            clean = sCleaner.clean(str);
        } else {
            if (url == null) {
                throw new IllegalArgumentException("Illegal argument exception.");
            }
            try {
                clean = sCleaner.clean(url.getProtocol().equals("file") ? new BufferedInputStream(url.openConnection().getInputStream()) : new BufferedInputStream(((HttpURLConnection) url.openConnection()).getInputStream()), "euc-kr");
            } catch (IOException e) {
                throw e;
            }
        }
        clean.setDocType(null);
        Document createDOM = new DomSerializer(sCleaner.getProperties(), true).createDOM(clean);
        String namespaceURI = createDOM.getDocumentElement().getNamespaceURI();
        if (namespaceURI != null) {
            Extractor.sLogger.severe("xmlns: " + namespaceURI);
        }
        if (str2 != null) {
            try {
                new PrettyXmlSerializer(sCleaner.getProperties()).writeToFile(clean, str2);
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
        return createDOM;
    }

    public static Document getDocumentFromHtml(URL url, String str) throws ParserConfigurationException, IllegalArgumentException, IOException {
        return getDocumentFromHtml(null, url, str);
    }

    private static String getMaskedValueForFail(String str) {
        StringBuilder sb = new StringBuilder("");
        int i = 0;
        while (i < str.length()) {
            if (str.charAt(i) == '&') {
                String substring = str.substring(i);
                if (isSymbol(substring)) {
                    sb.append(substring.substring(0, substring.indexOf(Config.KEYVALUE_SPLIT) + 1));
                    i += substring.indexOf(Config.KEYVALUE_SPLIT);
                    i++;
                }
            }
            String str2 = "" + str.charAt(i);
            sb.append(Character.isDigit(str.charAt(i)) ? str2.replaceAll("\\d", "0") : Character.isLowerCase(str.charAt(i)) ? str2.replaceAll("\\w", "a") : Character.isUpperCase(str.charAt(i)) ? str2.replaceAll("\\w", "A") : str2.replaceAll("\\p{Alpha}", "H"));
            i++;
        }
        return sb.toString();
    }

    private static boolean isSymbol(String str) {
        return str.length() >= 4 && str.indexOf(Config.KEYVALUE_SPLIT) > 0 && str.indexOf(Config.KEYVALUE_SPLIT) <= 5;
    }

    public static String maskOriginalText(String str) {
        StringBuilder sb = new StringBuilder();
        Pattern compile = Pattern.compile("<(.*?)>");
        Pattern compile2 = Pattern.compile(">(.*?)<");
        Matcher matcher = compile.matcher(str);
        Matcher matcher2 = compile2.matcher(str);
        try {
            if (matcher.find() && matcher2.find()) {
                while (matcher.find() && matcher2.find()) {
                    sb.append(matcher.group(0) + getMaskedValueForFail(matcher2.group(1)));
                }
            } else {
                sb.append(getMaskedValueForFail(str));
            }
        } catch (Exception e) {
            Extractor.sLogger.severe(e.getMessage());
        }
        String sb2 = sb.toString();
        Extractor.sLogger.info(sb2);
        return sb2;
    }

    private static String removeBrTag(String str) {
        return str.replaceAll("<\\s*[bB][rR][^>]*?>", "\n");
    }

    static void saveDocumentToFile(Document document, String str) {
        try {
            TransformerFactory.newInstance().newTransformer().transform(new DOMSource(document), new StreamResult(new File(str)));
        } catch (TransformerConfigurationException e) {
            e.printStackTrace();
        } catch (TransformerException e2) {
            e2.printStackTrace();
        }
    }
}
