package TxtParserPackage.extractors.HTML_DataGrabber;

import TxtParserPackage.AutomaticTextParser;
import java.io.File;

/* loaded from: classes.dex */
public class HtmlWikipediaGrabber extends HTMLDataGrabber {
    public static String CACHEDIR = "./wikipedia";

    public HtmlWikipediaGrabber() {
        File file = new File(CACHEDIR);
        if (file.exists()) {
            return;
        }
        file.mkdir();
    }

    public static void main(String[] strArr) {
        HTMLDataGrabber.DEBUG = false;
        new HtmlWikipediaGrabber().extractFromHtml("Cesare", false, true, true);
    }

    public String extractFromHtml(String str, boolean z, boolean z2, boolean z3) {
        String str2 = "";
        File file = new File(String.valueOf(CACHEDIR) + "/" + str + ".txt");
        if (z && file.exists()) {
            return AutomaticTextParser.fillFromFileToString(file);
        }
        String remotePage = getRemotePage("http://it.wikipedia.org/wiki/" + str);
        if (remotePage == null) {
            return null;
        }
        if (z2) {
            AutomaticTextParser.writeToTextFile(String.valueOf(CACHEDIR) + "/" + str + ".html", remotePage);
        }
        String[][] extractFromHtml = extractFromHtml(remotePage, "<h1 id=\"firstHeading\"", "<li id=\"footer-info-lastmod\">", "\n", "<p", "</p>", 1);
        for (int i = 0; i < extractFromHtml.length; i++) {
            for (int i2 = 0; i2 < extractFromHtml[i].length; i2++) {
                extractFromHtml[i][i2] = extractFromHtml[i][i2].trim();
                if (extractFromHtml[i][i2].length() > 0) {
                    str2 = String.valueOf(str2) + extractFromHtml[i][i2] + "\n";
                }
            }
        }
        if (z3) {
            AutomaticTextParser.writeToTextFile(String.valueOf(CACHEDIR) + "/" + str + ".txt", str2);
        }
        return str2;
    }
}
