package org.apache.tika.parser.ocr;

import bj.d;
import gg0.e;
import gg0.l;
import gg0.m;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.ImageObserver;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.imageio.ImageIO;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.opc.ContentTypes;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.sax.v;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: classes6.dex */
public class TesseractOCRParser extends AbstractParser {

    /* renamed from: a, reason: collision with root package name */
    public static final TesseractOCRConfig f88204a = new TesseractOCRConfig();

    /* renamed from: b, reason: collision with root package name */
    public static final Set<MediaType> f88205b = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.image("png"), MediaType.image(ContentTypes.EXTENSION_JPG_2), MediaType.image("tiff"), MediaType.image("x-ms-bmp"), MediaType.image(ContentTypes.EXTENSION_GIF))));

    /* renamed from: c, reason: collision with root package name */
    public static Map<String, Boolean> f88206c = new HashMap();

    /* renamed from: d, reason: collision with root package name */
    public static Parser f88207d = new CompositeImageParser();
    private static final long serialVersionUID = -8167538283213097265L;

    /* loaded from: classes6.dex */
    public static class CompositeImageParser extends CompositeParser {

        /* renamed from: a, reason: collision with root package name */
        public static List<Parser> f88208a = Arrays.asList(new ImageParser(), new JpegParser(), new TiffParser());
        private static final long serialVersionUID = -2398203346206381382L;

        public CompositeImageParser() {
            super(new MediaTypeRegistry(), f88208a);
        }
    }

    /* loaded from: classes6.dex */
    public class a implements Callable<Integer> {

        /* renamed from: a, reason: collision with root package name */
        public final /* synthetic */ Process f88209a;

        public a(Process process) {
            this.f88209a = process;
        }

        @Override // java.util.concurrent.Callable
        /* renamed from: a, reason: merged with bridge method [inline-methods] */
        public Integer call() throws Exception {
            return Integer.valueOf(this.f88209a.waitFor());
        }
    }

    /* loaded from: classes6.dex */
    public class b extends Thread {

        /* renamed from: a, reason: collision with root package name */
        public final /* synthetic */ InputStream f88211a;

        public b(InputStream inputStream) {
            this.f88211a = inputStream;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            InputStreamReader inputStreamReader = new InputStreamReader(this.f88211a, e.f52967a);
            StringBuilder sb2 = new StringBuilder();
            char[] cArr = new char[1024];
            while (true) {
                try {
                    int read = inputStreamReader.read(cArr);
                    if (read == -1) {
                        break;
                    } else {
                        sb2.append(cArr, 0, read);
                    }
                } catch (IOException unused) {
                } catch (Throwable th2) {
                    e.a(this.f88211a);
                    throw th2;
                }
            }
            e.a(this.f88211a);
            LogFactory.getLog(TesseractOCRParser.class).debug(sb2.toString());
        }
    }

    public static String getTesseractProg() {
        return System.getProperty("os.name").startsWith("Windows") ? "tesseract.exe" : "tesseract";
    }

    public final void a(File file, File file2, TesseractOCRConfig tesseractOCRConfig) throws IOException, TikaException {
        ProcessBuilder processBuilder = new ProcessBuilder(tesseractOCRConfig.getTesseractPath() + getTesseractProg(), file.getPath(), file2.getPath(), "-l", tesseractOCRConfig.getLanguage(), "-psm", tesseractOCRConfig.getPageSegMode());
        e(tesseractOCRConfig, processBuilder);
        Process start = processBuilder.start();
        start.getOutputStream().close();
        InputStream inputStream = start.getInputStream();
        InputStream errorStream = start.getErrorStream();
        d("OCR MSG", inputStream, file);
        d("OCR ERROR", errorStream, file);
        FutureTask futureTask = new FutureTask(new a(start));
        Thread thread = new Thread(futureTask);
        thread.start();
        try {
            futureTask.get(tesseractOCRConfig.getTimeout(), TimeUnit.SECONDS);
        } catch (InterruptedException e11) {
            thread.interrupt();
            start.destroy();
            Thread.currentThread().interrupt();
            throw new TikaException("TesseractOCRParser interrupted", e11);
        } catch (ExecutionException unused) {
        } catch (TimeoutException e12) {
            thread.interrupt();
            start.destroy();
            throw new TikaException("TesseractOCRParser timeout", e12);
        }
    }

    public final void b(InputStream inputStream, v vVar) throws SAXException, IOException {
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream, e.f52967a);
        vVar.startDocument();
        vVar.o(d.f10151q);
        try {
            char[] cArr = new char[1024];
            while (true) {
                int read = inputStreamReader.read(cArr);
                if (read == -1) {
                    inputStreamReader.close();
                    vVar.k(d.f10151q);
                    vVar.endDocument();
                    return;
                } else if (read > 0) {
                    vVar.characters(cArr, 0, read);
                }
            }
        } catch (Throwable th2) {
            inputStreamReader.close();
            throw th2;
        }
    }

    public final boolean c(TesseractOCRConfig tesseractOCRConfig) {
        String str = tesseractOCRConfig.getTesseractPath() + getTesseractProg();
        if (f88206c.containsKey(str)) {
            return f88206c.get(str).booleanValue();
        }
        try {
            boolean check = ExternalParser.check(new String[]{str}, new int[0]);
            f88206c.put(str, Boolean.valueOf(check));
            return check;
        } catch (NoClassDefFoundError unused) {
            f88206c.put(str, Boolean.FALSE);
            return false;
        }
    }

    public final void d(String str, InputStream inputStream, File file) {
        new b(inputStream).start();
    }

    public final void e(TesseractOCRConfig tesseractOCRConfig, ProcessBuilder processBuilder) {
        if (tesseractOCRConfig.getTesseractPath().isEmpty()) {
            return;
        }
        processBuilder.environment().put("TESSDATA_PREFIX", tesseractOCRConfig.getTesseractPath());
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return c((TesseractOCRConfig) parseContext.get(TesseractOCRConfig.class, f88204a)) ? f88205b : Collections.emptySet();
    }

    public void parse(Image image, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        FileOutputStream fileOutputStream;
        l lVar = new l();
        m mVar = null;
        try {
            BufferedImage bufferedImage = new BufferedImage(image.getWidth((ImageObserver) null), image.getHeight((ImageObserver) null), 1);
            File b12 = lVar.b();
            fileOutputStream = new FileOutputStream(b12);
            try {
                ImageIO.write(bufferedImage, "png", fileOutputStream);
                mVar = m.t(b12);
                parse(mVar, contentHandler, metadata, parseContext);
                lVar.c();
                if (mVar != null) {
                    mVar.close();
                }
                fileOutputStream.close();
            } catch (Throwable th2) {
                th = th2;
                lVar.c();
                if (mVar != null) {
                    mVar.close();
                }
                if (fileOutputStream != null) {
                    fileOutputStream.close();
                }
                throw th;
            }
        } catch (Throwable th3) {
            th = th3;
            fileOutputStream = null;
        }
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        TesseractOCRConfig tesseractOCRConfig = (TesseractOCRConfig) parseContext.get(TesseractOCRConfig.class, f88204a);
        if (!c(tesseractOCRConfig)) {
            return;
        }
        v vVar = new v(contentHandler, metadata);
        l lVar = new l();
        File file = null;
        try {
            m z11 = m.z(inputStream, lVar);
            File file2 = z11.getFile();
            long S = z11.S();
            if (S >= tesseractOCRConfig.getMinFileSizeToOcr() && S <= tesseractOCRConfig.getMaxFileSizeToOcr()) {
                File b12 = lVar.b();
                a(file2, b12, tesseractOCRConfig);
                File file3 = new File(b12.getAbsolutePath() + ".txt");
                try {
                    if (file3.exists()) {
                        b(new FileInputStream(file3), vVar);
                    }
                    file = file3;
                } catch (Throwable th2) {
                    th = th2;
                    file = file3;
                    lVar.c();
                    if (file != null) {
                        file.delete();
                    }
                    throw th;
                }
            }
            f88207d.parse(z11, contentHandler, metadata, parseContext);
            lVar.c();
            if (file != null) {
                file.delete();
            }
        } catch (Throwable th3) {
            th = th3;
        }
    }
}
