package org.apache.tika.parser.ocr;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;

/* loaded from: classes.dex */
public class TesseractOCRParser extends AbstractParser {
    public static final TesseractOCRConfig b2 = new TesseractOCRConfig();
    public static final Set<MediaType> c2 = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.h("png"), MediaType.h("jpeg"), MediaType.h("tiff"), MediaType.h("x-ms-bmp"), MediaType.h("gif"))));
    public static Map<String, Boolean> d2 = new HashMap();
    public static Parser e2 = new CompositeImageParser();

    /* loaded from: classes.dex */
    public static class CompositeImageParser extends CompositeParser {
        public static List<Parser> e2 = Arrays.asList(new ImageParser(), new JpegParser(), new TiffParser());

        public CompositeImageParser() {
            super(new MediaTypeRegistry(), e2, null);
        }
    }

    public static String c() {
        return System.getProperty("os.name").startsWith("Windows") ? "tesseract.exe" : "tesseract";
    }

    public final void a(File file, File file2, TesseractOCRConfig tesseractOCRConfig) {
        String str;
        final Process start;
        Thread thread;
        ProcessBuilder processBuilder = new ProcessBuilder(tesseractOCRConfig.b2 + c(), file.getPath(), file2.getPath(), "-l", tesseractOCRConfig.d2, "-psm", tesseractOCRConfig.e2);
        Map<String, String> environment = processBuilder.environment();
        try {
            if (tesseractOCRConfig.c2.isEmpty()) {
                if (!tesseractOCRConfig.b2.isEmpty()) {
                    str = tesseractOCRConfig.b2;
                }
                start = processBuilder.start();
                start.getOutputStream().close();
                final InputStream inputStream = start.getInputStream();
                final InputStream errorStream = start.getErrorStream();
                new Thread(this) { // from class: org.apache.tika.parser.ocr.TesseractOCRParser.2
                    @Override // java.lang.Thread, java.lang.Runnable
                    public void run() {
                        InputStreamReader inputStreamReader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
                        StringBuilder sb = new StringBuilder();
                        char[] cArr = new char[1024];
                        while (true) {
                            try {
                                int read = inputStreamReader.read(cArr);
                                if (read == -1) {
                                    break;
                                } else {
                                    sb.append(cArr, 0, read);
                                }
                            } catch (IOException unused) {
                            } catch (Throwable th) {
                                IOUtils.closeQuietly(inputStream);
                                throw th;
                            }
                        }
                        IOUtils.closeQuietly(inputStream);
                        LogFactory.getLog(TesseractOCRParser.class).debug(sb.toString());
                    }
                }.start();
                new Thread(this) { // from class: org.apache.tika.parser.ocr.TesseractOCRParser.2
                    @Override // java.lang.Thread, java.lang.Runnable
                    public void run() {
                        InputStreamReader inputStreamReader = new InputStreamReader(errorStream, StandardCharsets.UTF_8);
                        StringBuilder sb = new StringBuilder();
                        char[] cArr = new char[1024];
                        while (true) {
                            try {
                                int read = inputStreamReader.read(cArr);
                                if (read == -1) {
                                    break;
                                } else {
                                    sb.append(cArr, 0, read);
                                }
                            } catch (IOException unused) {
                            } catch (Throwable th) {
                                IOUtils.closeQuietly(errorStream);
                                throw th;
                            }
                        }
                        IOUtils.closeQuietly(errorStream);
                        LogFactory.getLog(TesseractOCRParser.class).debug(sb.toString());
                    }
                }.start();
                FutureTask futureTask = new FutureTask(new Callable<Integer>(this) { // from class: org.apache.tika.parser.ocr.TesseractOCRParser.1
                    @Override // java.util.concurrent.Callable
                    public Integer call() {
                        return Integer.valueOf(start.waitFor());
                    }
                });
                thread = new Thread(futureTask);
                thread.start();
                futureTask.get(tesseractOCRConfig.h2, TimeUnit.SECONDS);
                return;
            }
            str = tesseractOCRConfig.c2;
            futureTask.get(tesseractOCRConfig.h2, TimeUnit.SECONDS);
            return;
        } catch (InterruptedException e) {
            thread.interrupt();
            start.destroy();
            Thread.currentThread().interrupt();
            throw new TikaException("TesseractOCRParser interrupted", e);
        } catch (ExecutionException unused) {
            return;
        } catch (TimeoutException e3) {
            thread.interrupt();
            start.destroy();
            throw new TikaException("TesseractOCRParser timeout", e3);
        }
        environment.put("TESSDATA_PREFIX", str);
        start = processBuilder.start();
        start.getOutputStream().close();
        final InputStream inputStream2 = start.getInputStream();
        final InputStream errorStream2 = start.getErrorStream();
        new Thread(this) { // from class: org.apache.tika.parser.ocr.TesseractOCRParser.2
            @Override // java.lang.Thread, java.lang.Runnable
            public void run() {
                InputStreamReader inputStreamReader = new InputStreamReader(inputStream2, StandardCharsets.UTF_8);
                StringBuilder sb = new StringBuilder();
                char[] cArr = new char[1024];
                while (true) {
                    try {
                        int read = inputStreamReader.read(cArr);
                        if (read == -1) {
                            break;
                        } else {
                            sb.append(cArr, 0, read);
                        }
                    } catch (IOException unused2) {
                    } catch (Throwable th) {
                        IOUtils.closeQuietly(inputStream2);
                        throw th;
                    }
                }
                IOUtils.closeQuietly(inputStream2);
                LogFactory.getLog(TesseractOCRParser.class).debug(sb.toString());
            }
        }.start();
        new Thread(this) { // from class: org.apache.tika.parser.ocr.TesseractOCRParser.2
            @Override // java.lang.Thread, java.lang.Runnable
            public void run() {
                InputStreamReader inputStreamReader = new InputStreamReader(errorStream2, StandardCharsets.UTF_8);
                StringBuilder sb = new StringBuilder();
                char[] cArr = new char[1024];
                while (true) {
                    try {
                        int read = inputStreamReader.read(cArr);
                        if (read == -1) {
                            break;
                        } else {
                            sb.append(cArr, 0, read);
                        }
                    } catch (IOException unused2) {
                    } catch (Throwable th) {
                        IOUtils.closeQuietly(errorStream2);
                        throw th;
                    }
                }
                IOUtils.closeQuietly(errorStream2);
                LogFactory.getLog(TesseractOCRParser.class).debug(sb.toString());
            }
        }.start();
        FutureTask futureTask2 = new FutureTask(new Callable<Integer>(this) { // from class: org.apache.tika.parser.ocr.TesseractOCRParser.1
            @Override // java.util.concurrent.Callable
            public Integer call() {
                return Integer.valueOf(start.waitFor());
            }
        });
        thread = new Thread(futureTask2);
        thread.start();
    }

    public final void b(InputStream inputStream, XHTMLContentHandler xHTMLContentHandler) {
        xHTMLContentHandler.startDocument();
        xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "div", "div", XHTMLContentHandler.q2);
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
        try {
            char[] cArr = new char[1024];
            while (true) {
                int read = inputStreamReader.read(cArr);
                if (read == -1) {
                    inputStreamReader.close();
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "div", "div");
                    xHTMLContentHandler.endDocument();
                    return;
                } else if (read > 0) {
                    xHTMLContentHandler.characters(cArr, 0, read);
                }
            }
        } catch (Throwable th) {
            try {
                throw th;
            } catch (Throwable th2) {
                try {
                    inputStreamReader.close();
                } catch (Throwable th3) {
                    th.addSuppressed(th3);
                }
                throw th2;
            }
        }
    }

    public final boolean d(TesseractOCRConfig tesseractOCRConfig) {
        String str = tesseractOCRConfig.b2 + c();
        if (((HashMap) d2).containsKey(str)) {
            return ((Boolean) ((HashMap) d2).get(str)).booleanValue();
        }
        boolean a = ExternalParser.a(new String[]{str}, new int[0]);
        ((HashMap) d2).put(str, Boolean.valueOf(a));
        return a;
    }

    @Override // org.apache.tika.parser.Parser
    public void o(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
        File file;
        TesseractOCRConfig tesseractOCRConfig = (TesseractOCRConfig) parseContext.a(TesseractOCRConfig.class, b2);
        if (!d(tesseractOCRConfig)) {
            return;
        }
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        TemporaryResources temporaryResources = new TemporaryResources();
        File file2 = null;
        try {
            TikaInputStream j = TikaInputStream.j(inputStream, temporaryResources);
            File p = j.p();
            long q = j.q();
            if (q >= tesseractOCRConfig.f2 && q <= tesseractOCRConfig.g2) {
                File file3 = temporaryResources.a().toFile();
                a(p, file3, tesseractOCRConfig);
                file = new File(file3.getAbsolutePath() + ".txt");
                try {
                    if (file.exists()) {
                        b(new FileInputStream(file), xHTMLContentHandler);
                    }
                    file2 = file;
                } catch (Throwable th) {
                    th = th;
                    temporaryResources.c();
                    if (file != null) {
                        file.delete();
                    }
                    throw th;
                }
            }
            ((CompositeParser) e2).o(j, contentHandler, metadata, parseContext);
            temporaryResources.c();
            if (file2 != null) {
                file2.delete();
            }
        } catch (Throwable th2) {
            th = th2;
            file = file2;
        }
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> r(ParseContext parseContext) {
        return d((TesseractOCRConfig) parseContext.a(TesseractOCRConfig.class, b2)) ? c2 : Collections.emptySet();
    }
}
