package org.apache.tika.parser.html;

import com.tencent.open.SocialConstants;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jg0.f;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.r;
import org.apache.tika.sax.v;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* compiled from: HtmlHandler.java */
/* loaded from: classes6.dex */
public class b extends r {

    /* renamed from: l, reason: collision with root package name */
    public static final Set<String> f87968l = new HashSet(Arrays.asList("src", "href", "longdesc", "cite"));

    /* renamed from: m, reason: collision with root package name */
    public static final Pattern f87969m = Pattern.compile("\\s*(-?\\d+\\.\\d+)[,\\s]+(-?\\d+\\.\\d+)\\s*");

    /* renamed from: d, reason: collision with root package name */
    public final c f87970d;

    /* renamed from: e, reason: collision with root package name */
    public final v f87971e;

    /* renamed from: f, reason: collision with root package name */
    public final Metadata f87972f;

    /* renamed from: g, reason: collision with root package name */
    public final StringBuilder f87973g;

    /* renamed from: h, reason: collision with root package name */
    public int f87974h;

    /* renamed from: i, reason: collision with root package name */
    public int f87975i;

    /* renamed from: j, reason: collision with root package name */
    public int f87976j;

    /* renamed from: k, reason: collision with root package name */
    public boolean f87977k;

    public b(c cVar, v vVar, Metadata metadata) {
        super(vVar);
        String str;
        this.f87973g = new StringBuilder();
        this.f87974h = 0;
        this.f87975i = 0;
        this.f87976j = 0;
        this.f87977k = false;
        this.f87970d = cVar;
        this.f87971e = vVar;
        this.f87972f = metadata;
        if (metadata.get("Content-Location") != null || (str = metadata.get(jg0.r.Sd)) == null) {
            return;
        }
        String trim = str.trim();
        try {
            new URL(trim);
            metadata.set("Content-Location", trim);
        } catch (MalformedURLException unused) {
        }
    }

    public b(c cVar, ContentHandler contentHandler, Metadata metadata) {
        this(cVar, new v(contentHandler, metadata), metadata);
    }

    public final void a(String str, String str2) {
        if (str == null || str2 == null) {
            return;
        }
        if (!str.equalsIgnoreCase("ICBM")) {
            if (!str.equalsIgnoreCase("Content-Type")) {
                this.f87972f.add(str, str2);
                return;
            }
            MediaType parse = MediaType.parse(str2);
            if (parse != null) {
                this.f87972f.set(TikaCoreProperties.f87776c, parse.toString());
                return;
            } else {
                this.f87972f.set(TikaCoreProperties.f87776c, str2);
                return;
            }
        }
        Matcher matcher = f87969m.matcher(str2);
        if (!matcher.matches()) {
            this.f87972f.set("ICBM", str2);
            return;
        }
        this.f87972f.set("ICBM", matcher.group(1) + ", " + matcher.group(2));
        this.f87972f.set(f.Ac, matcher.group(1));
        this.f87972f.set(f.Bc, matcher.group(2));
    }

    public final String b(String str) {
        return c(this.f87972f.get("Content-Location"), str);
    }

    public final String c(String str, String str2) {
        String trim = str2.trim();
        String lowerCase = trim.toLowerCase(Locale.ENGLISH);
        if (str != null && !lowerCase.startsWith("urn:") && !lowerCase.startsWith(t0.c.f101118b) && !lowerCase.startsWith("tel:") && !lowerCase.startsWith(zp.d.f119454e) && !lowerCase.startsWith("javascript:") && !lowerCase.startsWith("about:")) {
            try {
                URL url = new URL(str.trim());
                String path = url.getPath();
                if (!trim.startsWith("?") || path.length() <= 0 || path.endsWith("/")) {
                    return new URL(url, trim).toExternalForm();
                }
                return new URL(url.getProtocol(), url.getHost(), url.getPort(), url.getPath() + trim).toExternalForm();
            } catch (MalformedURLException unused) {
            }
        }
        return trim;
    }

    @Override // org.apache.tika.sax.r, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i11, int i12) throws SAXException {
        if (this.f87976j > 0 && this.f87974h == 0) {
            this.f87973g.append(cArr, i11, i12);
        }
        if (this.f87974h <= 0 || this.f87975i != 0) {
            return;
        }
        super.characters(cArr, i11, i12);
    }

    public final void d(String str, Attributes attributes) throws SAXException {
        if (attributes.getLength() == 0) {
            this.f87971e.o(str);
            return;
        }
        boolean equals = str.equals("object");
        String str2 = null;
        if (equals) {
            String value = attributes.getValue("", "codebase");
            str2 = value != null ? b(value) : this.f87972f.get("Content-Location");
        }
        AttributesImpl attributesImpl = new AttributesImpl(attributes);
        int i11 = 0;
        while (i11 < attributesImpl.getLength()) {
            String a12 = this.f87970d.a(str, attributesImpl.getLocalName(i11));
            if (a12 == null) {
                attributesImpl.removeAttribute(i11);
                i11--;
            } else {
                attributesImpl.setLocalName(i11, a12);
                if (f87968l.contains(a12)) {
                    attributesImpl.setValue(i11, b(attributesImpl.getValue(i11)));
                } else if (equals && "codebase".equals(a12)) {
                    attributesImpl.setValue(i11, str2);
                } else if (equals && ("data".equals(a12) || "classid".equals(a12))) {
                    attributesImpl.setValue(i11, c(str2, attributesImpl.getValue(i11)));
                }
            }
            i11++;
        }
        if (SocialConstants.PARAM_IMG_URL.equals(str) && attributesImpl.getValue("", "alt") == null) {
            attributesImpl.addAttribute("", "alt", "alt", "CDATA", "");
        }
        this.f87971e.q(str, attributesImpl);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (this.f87974h > 0 && this.f87975i == 0) {
            String b12 = this.f87970d.b(str3);
            if (b12 != null) {
                this.f87971e.k(b12);
            } else if (v.f88477q.contains(str3.toLowerCase(Locale.ENGLISH))) {
                this.f87971e.n();
            }
        }
        int i11 = this.f87976j;
        if (i11 > 0) {
            int i12 = i11 - 1;
            this.f87976j = i12;
            if (i12 == 0 && !this.f87977k) {
                this.f87972f.set(TikaCoreProperties.f87790q, this.f87973g.toString().trim());
                this.f87977k = true;
            }
        }
        int i13 = this.f87974h;
        if (i13 > 0) {
            this.f87974h = i13 - 1;
        }
        int i14 = this.f87975i;
        if (i14 > 0) {
            this.f87975i = i14 - 1;
        }
    }

    @Override // org.apache.tika.sax.r, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void ignorableWhitespace(char[] cArr, int i11, int i12) throws SAXException {
        if (this.f87974h <= 0 || this.f87975i != 0) {
            return;
        }
        super.ignorableWhitespace(cArr, i11, i12);
    }

    @Override // org.apache.tika.sax.r, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        String b12;
        if ("TITLE".equals(str3) || this.f87976j > 0) {
            this.f87976j++;
        }
        if ("BODY".equals(str3) || "FRAMESET".equals(str3) || this.f87974h > 0) {
            this.f87974h++;
        }
        if (this.f87970d.c(str3) || this.f87975i > 0) {
            this.f87975i++;
        }
        if (this.f87974h == 0 && this.f87975i == 0) {
            if (!"META".equals(str3) || attributes.getValue("content") == null) {
                if ("BASE".equals(str3) && attributes.getValue("href") != null) {
                    d(bj.d.X, attributes);
                    this.f87971e.k(bj.d.X);
                    this.f87972f.set("Content-Location", b(attributes.getValue("href")));
                } else if ("LINK".equals(str3)) {
                    d("link", attributes);
                    this.f87971e.k("link");
                }
            } else if (attributes.getValue("http-equiv") != null) {
                a(attributes.getValue("http-equiv"), attributes.getValue("content"));
            } else if (attributes.getValue("name") != null) {
                a(attributes.getValue("name"), attributes.getValue("content"));
            } else if (attributes.getValue("property") != null) {
                this.f87972f.add(attributes.getValue("property"), attributes.getValue("content"));
            }
        }
        if (this.f87974h > 0 && this.f87975i == 0 && (b12 = this.f87970d.b(str3)) != null) {
            d(b12, attributes);
        }
        this.f87973g.setLength(0);
    }
}
