package com.terark.mobilesearch.docparser.pdf;

import br.a;
import java.io.File;
import org.apache.lucene.analysis.shingle.ShingleFilter;

/* loaded from: classes.dex */
public class PdfToText {
    private static final String TAG = "TERARK_PdfToText";

    static {
        System.loadLibrary("pdftotext");
    }

    public static String getContent(String str, int i2) {
        if (new File(str).length() >= 209715200) {
            a.a(TAG, "PDF 文件尺寸超过 20M, 跳过!");
            return null;
        }
        String textOnly = getTextOnly(str, i2);
        return textOnly == null ? "" : textOnly.replaceAll("[\\s+\\r\\n]]", ShingleFilter.DEFAULT_TOKEN_SEPARATOR).replaceFirst("^\\s+", "");
    }

    private static native String getTextOnly(String str, int i2);
}
