package com.waplyj.filesystem;

import com.waplyj.util.LogUtil;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public class HtmlFile extends TxtFile {
    public static String extractText(String str) {
        String replaceAll = read(str).replaceAll("\n", "/n");
        if (Pattern.compile("(.*)(charset\\s?=\\s?)(gb2312|gbk|gb18030)(.*)", 2).matcher(replaceAll).matches()) {
            LogUtil.debug(String.valueOf(str) + ": charset is gb2312/gbk/gb18030");
            replaceAll = read(str, "gbk");
        } else {
            LogUtil.debug(String.valueOf(str) + ": charset not matches gb2312/gbk/gb18030");
        }
        return trimTags(replaceAll.replaceAll("/n", "\n"));
    }

    public static String trimStringByRegex(String str, String str2) {
        return Pattern.compile(str2, 2).matcher(str).replaceAll("");
    }

    public static String trimTags(String str) {
        return trimStringByRegex(trimStringByRegex(str, "<script[^>]*?>[\\s\\S]*?</script>"), "<style[^>]*?>[\\s\\S]*?</style>").replaceAll("<[^>]+>", "").trim().replaceAll("\r", "").replaceAll("\n{3,}", "\n").replaceAll(" {4,}", "\t").replaceAll("&nbsp;", " ").replaceAll("&lt;", "<").replaceAll("&gt;", ">").replaceAll("&quot;", "\"").replaceAll("&amp;", "&");
    }
}
