package com.github.sunnysuperman.commons.utils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;

/* loaded from: classes.dex */
public class ContentExtractor {
    private static final int Threshold = 10;
    private static Map<String, String> cs = new HashMap();

    /* loaded from: classes.dex */
    public static class Paragraph {
        private int FactorLen = 1;
        private int FactorPos = 1;
        public List<Sentence> sentences = new ArrayList();

        public void addSentence(Sentence sentence) {
            this.sentences.add(sentence);
        }

        public String getContent() {
            StringBuffer stringBuffer = new StringBuffer();
            Iterator<Sentence> it = this.sentences.iterator();
            while (it.hasNext()) {
                stringBuffer.append(it.next().content + " ");
            }
            return stringBuffer.toString();
        }

        public int getFirstSentencePos() {
            if (this.sentences.size() == 0) {
                return -1;
            }
            return this.sentences.get(0).pos;
        }

        public int getLastSentencePos() {
            if (this.sentences.size() == 0) {
                return -1;
            }
            return this.sentences.get(this.sentences.size() - 1).pos;
        }

        public int getScore() {
            int i = 0;
            Iterator<Sentence> it = this.sentences.iterator();
            while (it.hasNext()) {
                i += it.next().content.length() * this.FactorLen;
            }
            int firstSentencePos = getFirstSentencePos();
            for (Sentence sentence : this.sentences) {
                i -= (sentence.pos - firstSentencePos) * this.FactorPos;
                firstSentencePos = sentence.pos;
            }
            return i;
        }
    }

    /* loaded from: classes.dex */
    public static class Sentence {
        public String content;
        public Paragraph para;
        public int pos;

        public void normalizeContent() {
            if (StringUtil.isEmpty(this.content)) {
                return;
            }
            int indexOf = this.content.indexOf(38);
            int indexOf2 = this.content.indexOf(59, indexOf);
            boolean z = indexOf >= 0 && indexOf2 > 0;
            while (z) {
                String substring = this.content.substring(indexOf, indexOf2 + 1);
                String str = ContentExtractor.cs.containsKey(substring) ? (String) ContentExtractor.cs.get(substring) : null;
                String substring2 = this.content.substring(0, indexOf);
                String substring3 = this.content.substring(indexOf2 + 1);
                if (str != null) {
                    this.content = substring2 + str + substring3;
                } else {
                    this.content = substring2 + substring3;
                }
                indexOf = this.content.indexOf(38);
                indexOf2 = this.content.indexOf(59, indexOf);
                z = indexOf >= 0 && indexOf2 > 0;
            }
        }
    }

    static {
        cs.put("&shy;", "-");
        cs.put("&ndash;", "–");
        cs.put("&mdash;", "—");
        cs.put("&lrm;", "?");
        cs.put("&rlm;", " ");
        cs.put("&zwj;", "?");
        cs.put("&zwnj;", "?");
        cs.put("&emsp;", " ");
        cs.put("&ensp;", " ");
        cs.put("&nbsp;", " ");
        cs.put("&thinsp;", "?");
        cs.put("&quot;", "\"");
        cs.put("&amp;", "&");
        cs.put("&amp;", "&");
        cs.put("&lang;", "?");
        cs.put("&lceil;", "?");
        cs.put("&lfloor;", "?");
        cs.put("&piv;", "?");
        cs.put("&rang;", "?");
        cs.put("&rceil;", "?");
        cs.put("&rfloor;", "?");
        cs.put("&thetasym;", "?");
        cs.put("&upsih;", "?");
        cs.put("&circ;", "?");
        cs.put("&iexcl;", "?");
        cs.put("&brvbar;", "|");
        cs.put("&uml;", "¨");
        cs.put("&macr;", "ˉ");
        cs.put("&acute;", "′");
        cs.put("&cedil;", "?");
        cs.put("&iquest;", "?");
        cs.put("&tilde;", "?");
        cs.put("&lsquo;", "‘");
        cs.put("&rsquo;", "’");
        cs.put("&sbquo;", "?");
        cs.put("&ldquo;", "“");
        cs.put("&rdquo;", "”");
        cs.put("&bdquo;", "?");
        cs.put("&prime;", "′");
        cs.put("&Prime;", "″");
        cs.put("&lsaquo;", "?");
        cs.put("&rsaquo;", "?");
        cs.put("&oline;", "￣");
        cs.put("&oplus;", "⊕");
        cs.put("&minus;", "?");
        cs.put("&otimes;", "?");
        cs.put("&frasl;", "?");
        cs.put("&lowast;", "?");
        cs.put("&lt;", "<");
        cs.put("&gt;", ">");
        cs.put("&plusmn;", "±");
        cs.put("&laquo;", "?");
        cs.put("&raquo;", "?");
        cs.put("&times;", "×");
        cs.put("&divide;", "÷");
        cs.put("&forall;", "?");
        cs.put("&part;", "?");
        cs.put("&exist;", "?");
        cs.put("&empty;", "?");
        cs.put("&nabla;", "?");
        cs.put("&isin;", "∈");
        cs.put("&notin;", "?");
        cs.put("&ni;", "?");
        cs.put("&prod;", "∏");
        cs.put("&sum;", "∑");
        cs.put("&radic;", "√");
        cs.put("&cong;", "∝");
        cs.put("&prop;", "∝");
        cs.put("&ang;", "∠");
        cs.put("&and;", "∧");
        cs.put("&or;", "∨");
        cs.put("&cap;", "∩");
        cs.put("&cup;", "∪");
        cs.put("&int;", "∫");
        cs.put("&there4;", "∴");
        cs.put("&sim;", "～");
        cs.put("&asymp;", "≈");
        cs.put("&ne;", "≠");
        cs.put("&equiv;", "≡");
        cs.put("&le;", "≤");
        cs.put("&ge;", "≥");
        cs.put("&sub;", "?");
        cs.put("&sup;", "?");
        cs.put("&nsub;", "?");
        cs.put("&sube;", "?");
        cs.put("&supe;", "?");
        cs.put("&perp;", "⊥");
        cs.put("&sdot;", "?");
        cs.put("&loz;", "?");
        cs.put("&uarr;", "↑");
        cs.put("&uArr;", "?");
        cs.put("&rarr;", "→");
        cs.put("&lArr;", "?");
        cs.put("&rArr;", "?");
        cs.put("&darr;", "↓");
        cs.put("&dArr;", "?");
        cs.put("&larr;", "←");
        cs.put("&crarr;", "?");
        cs.put("&harr;", "?");
        cs.put("&hArr;", "?");
        cs.put("&cent;", "￠");
        cs.put("&pound;", "￡");
        cs.put("&curren;", "¤");
        cs.put("&yen;", "￥");
        cs.put("&sect;", "§");
        cs.put("&copy;", "?");
        cs.put("&not;", "?");
        cs.put("&reg;", "?");
        cs.put("&deg;", "°");
        cs.put("&micro;", "μ");
        cs.put("&para;", "?");
        cs.put("&middot;", "·");
        cs.put("&dagger;", "?");
        cs.put("&Dagger;", "?");
        cs.put("&bull;", "?");
        cs.put("&hellip;", "…");
        cs.put("&permil;", "‰");
        cs.put("&spades;", "?");
        cs.put("&clubs;", "?");
        cs.put("&hearts;", "?");
        cs.put("&diams;", "?");
        cs.put("&euro;", "€");
        cs.put("&frac14;", "?");
        cs.put("&frac12;", "?");
        cs.put("&frac34;", "?");
        cs.put("&sup1;", "1");
        cs.put("&sup2;", "2");
        cs.put("&sup3;", "3");
        cs.put("&infin;", "∞");
        cs.put("&ordf;", "a");
        cs.put("&aacute;", "á");
        cs.put("&Aacute;", "á");
        cs.put("&Agrave;", "à");
        cs.put("&agrave;", "à");
        cs.put("&acirc;", "a");
        cs.put("&Acirc;", "?");
        cs.put("&Auml;", "?");
        cs.put("&auml;", "?");
        cs.put("&Atilde;", "?");
        cs.put("&atilde;", "?");
        cs.put("&Aring;", "?");
        cs.put("&aring;", "?");
        cs.put("&aelig;", "?");
        cs.put("&AElig;", "?");
        cs.put("&ccedil;", "?");
        cs.put("&Ccedil;", "?");
        cs.put("&eth;", "e");
        cs.put("&ETH;", "D");
        cs.put("&Eacute;", "é");
        cs.put("&eacute;", "é");
        cs.put("&Egrave;", "è");
        cs.put("&egrave;", "è");
        cs.put("&Ecirc;", "ê");
        cs.put("&ecirc;", "ê");
        cs.put("&euml;", "?");
        cs.put("&Euml;", "?");
        cs.put("&fnof;", "?");
        cs.put("&image;", "?");
        cs.put("&Iacute;", "í");
        cs.put("&iacute;", "í");
        cs.put("&igrave;", "ì");
        cs.put("&Igrave;", "ì");
        cs.put("&Icirc;", "?");
        cs.put("&icirc;", "?");
        cs.put("&Iuml;", "?");
        cs.put("&iuml;", "?");
        cs.put("&ntilde;", "?");
        cs.put("&Ntilde;", "?");
        cs.put("&ordm;", "o");
        cs.put("&Oacute;", "ó");
        cs.put("&oacute;", "ó");
        cs.put("&Ograve;", "ò");
        cs.put("&ograve;", "ò");
        cs.put("&Ocirc;", "?");
        cs.put("&ocirc;", "?");
        cs.put("&ouml;", "?");
        cs.put("&Ouml;", "?");
        cs.put("&otilde;", "?");
        cs.put("&Otilde;", "?");
        cs.put("&Oslash;", "?");
        cs.put("&oslash;", "?");
        cs.put("&oelig;", "?");
        cs.put("&OElig;", "?");
        cs.put("&weierp;", "?");
        cs.put("&real;", "?");
        cs.put("&Scaron;", "?");
        cs.put("&scaron;", "?");
        cs.put("&szlig;", "?");
        cs.put("&THORN;", "T");
        cs.put("&thorn;", "t");
        cs.put("&trade;", "?");
        cs.put("&uacute;", "ú");
        cs.put("&Uacute;", "ú");
        cs.put("&Ugrave;", "ù");
        cs.put("&ugrave;", "ù");
        cs.put("&ucirc;", "?");
        cs.put("&Ucirc;", "?");
        cs.put("&Uuml;", "ü");
        cs.put("&uuml;", "ü");
        cs.put("&Yacute;", "Y");
        cs.put("&yacute;", "y");
        cs.put("&Yuml;", "?");
        cs.put("&alpha;", "α");
        cs.put("&Alpha;", "Α");
        cs.put("&beta;", "β");
        cs.put("&Beta;", "Β");
        cs.put("&gamma;", "γ");
        cs.put("&Gamma;", "Γ");
        cs.put("&delta;", "δ");
        cs.put("&Delta;", "Δ");
        cs.put("&Epsilon;", "Ε");
        cs.put("&epsilon;", "ε");
        cs.put("&zeta;", "ζ");
        cs.put("&Zeta;", "Ζ");
        cs.put("&eta;", "η");
        cs.put("&Eta;", "Η");
        cs.put("&theta;", "θ");
        cs.put("&Theta;", "Θ");
        cs.put("&iota;", "ι");
        cs.put("&Iota;", "Ι");
        cs.put("&Kappa;", "Κ");
        cs.put("&kappa;", "κ");
        cs.put("&lambda;", "λ");
        cs.put("&Lambda;", "Λ");
        cs.put("&mu;", "μ");
        cs.put("&Mu;", "Μ");
        cs.put("&nu;", "ν");
        cs.put("&Nu;", "Ν");
        cs.put("&xi;", "ξ");
        cs.put("&Xi;", "Ξ");
        cs.put("&Omicron;", "Ο");
        cs.put("&omicron;", "ο");
        cs.put("&pi;", "π");
        cs.put("&Pi;", "Π");
        cs.put("&rho;", "ρ");
        cs.put("&Rho;", "Ρ");
        cs.put("&Sigma;", "Σ");
        cs.put("&sigma;", "σ");
        cs.put("&sigmaf;", "?");
        cs.put("&Tau;", "Τ");
        cs.put("&tau;", "τ");
        cs.put("&Upsilon;", "Υ");
        cs.put("&upsilon;", "υ");
        cs.put("&Phi;", "Φ");
        cs.put("&phi;", "φ");
        cs.put("&Chi;", "Χ");
        cs.put("&chi;", "χ");
        cs.put("&Psi;", "Ψ");
        cs.put("&psi;", "ψ");
        cs.put("&Omega;", "Ω");
        cs.put("&omega;", "ω");
        cs.put("&alefsym;", "?");
    }

    public static String extract(String str) {
        Paragraph paragraph;
        if (str == null) {
            return null;
        }
        Stack stack = new Stack();
        boolean z = false;
        String str2 = "";
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            char charAt = str.charAt(i2);
            if (charAt == '<') {
                z = true;
                if (!StringUtil.isEmpty(str2)) {
                    Sentence sentence = new Sentence();
                    sentence.pos = i;
                    sentence.content = str2;
                    sentence.normalizeContent();
                    if (stack.empty()) {
                        paragraph = new Paragraph();
                        stack.push(paragraph);
                    } else {
                        paragraph = (Paragraph) stack.peek();
                    }
                    int lastSentencePos = paragraph.getLastSentencePos();
                    if (lastSentencePos >= 0 && sentence.pos - lastSentencePos > 10) {
                        paragraph = new Paragraph();
                        stack.push(paragraph);
                    }
                    paragraph.addSentence(sentence);
                    str2 = "";
                }
            } else if (charAt == '>') {
                i++;
                z = false;
            }
            if (!z && charAt != '>' && charAt != '\r' && charAt != '\n' && charAt != '\t' && charAt != ' ') {
                str2 = str2 + charAt;
            }
        }
        Paragraph paragraph2 = null;
        Iterator it = stack.iterator();
        while (it.hasNext()) {
            Paragraph paragraph3 = (Paragraph) it.next();
            if (paragraph2 == null || paragraph2.getScore() < paragraph3.getScore()) {
                paragraph2 = paragraph3;
            }
        }
        return paragraph2 != null ? paragraph2.getContent() : str2;
    }

    public static String extract(String str, int i) {
        String extract;
        if (!StringUtil.isEmpty(str) && (extract = extract(str)) != null) {
            String trim = extract.trim();
            return trim.length() > i ? trim.substring(0, i) : trim;
        }
        return null;
    }

    public static String extractWithDot(String str, int i) {
        String extract = extract(str, i);
        return extract == null ? "" : extract.length() != str.length() ? extract + "......" : extract;
    }
}
