package sec.bdc.tm.hte.eu.ngram.preprocessing;

import com.google.common.collect.Lists;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import sec.bdc.tm.hte.eu.ngram.preprocessing.constants.GeneralConstants;
import sec.bdc.tm.hte.eu.ngram.preprocessing.constants.StringSubstitution;

/* loaded from: classes49.dex */
public class GeneralPreprocessor implements SourcePreprocessor {
    /* JADX INFO: Access modifiers changed from: package-private */
    public static void addIfNonempty(List<String> list, StringBuffer stringBuffer) {
        String trim = stringBuffer.toString().trim();
        if (trim.isEmpty()) {
            return;
        }
        list.add(trim);
    }

    private static String normalize(String str) {
        return Normalizer.normalize(str, GeneralConstants.NORMALIZATION_FORM);
    }

    private static String normalizePunctuation(String str) {
        return str.replaceAll("…+", "...").replaceAll("\\.\\.+", "...").replaceAll("!!+", "!").replaceAll("\\?\\?+", "?").replaceAll("[!?]*!\\?[!?]*", "?!").replaceAll("[!?]*\\?![!?]*", "?!");
    }

    private static String parseHtmlEncoding(String str) {
        return StringEscapeUtils.unescapeHtml4(str);
    }

    private static String preprocessNonBreaking(String str) {
        return replaceStrings(replacePattern(normalizePunctuation(parseHtmlEncoding(normalize(str))), GeneralConstants.NON_BREAKING_WHITESPACE_PATTERN, StringUtils.SPACE), GeneralConstants.SUBSTITUTIONS);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String removePattern(String str, Pattern pattern) {
        return replacePattern(str, pattern, "");
    }

    private static List<String> removePattern(List<String> list, final Pattern pattern) {
        return (List) list.stream().map(new Function(pattern) { // from class: sec.bdc.tm.hte.eu.ngram.preprocessing.GeneralPreprocessor$$Lambda$0
            private final Pattern arg$1;

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                this.arg$1 = pattern;
            }

            @Override // java.util.function.Function
            public Object apply(Object obj) {
                String removePattern;
                removePattern = GeneralPreprocessor.removePattern((String) obj, this.arg$1);
                return removePattern;
            }
        }).collect(Collectors.toList());
    }

    private static List<String> removePatternBreaking(List<String> list, Pattern pattern) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            Matcher matcher = pattern.matcher(it.next());
            while (matcher.find()) {
                StringBuffer stringBuffer = new StringBuffer();
                matcher.appendReplacement(stringBuffer, "");
                addIfNonempty(newArrayList, stringBuffer);
            }
            StringBuffer stringBuffer2 = new StringBuffer();
            matcher.appendTail(stringBuffer2);
            addIfNonempty(newArrayList, stringBuffer2);
        }
        return newArrayList;
    }

    private static String replacePattern(String str, Pattern pattern, String str2) {
        return pattern.matcher(str).replaceAll(str2).trim();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String replaceStrings(String str, List<StringSubstitution> list) {
        String str2 = str;
        for (StringSubstitution stringSubstitution : list) {
            Iterator<String> it = stringSubstitution.getFrom().iterator();
            while (it.hasNext()) {
                str2 = str2.replaceAll(Pattern.quote(it.next()), stringSubstitution.getTo());
            }
        }
        return str2;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public List<String> flatMap(Function<String, List<String>> function, List<String> list) {
        return (List) list.stream().flatMap(function.andThen(GeneralPreprocessor$$Lambda$1.$instance)).collect(Collectors.toList());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public List<String> map(Function<String, String> function, List<String> list) {
        return (List) list.stream().map(function).collect(Collectors.toList());
    }

    @Override // sec.bdc.tm.hte.eu.ngram.preprocessing.SourcePreprocessor
    public String preprocess(String str) {
        return removePattern(removePattern(removePattern(preprocessNonBreaking(str), GeneralConstants.URL_PATTERN), GeneralConstants.EMAIL_PATTERN), GeneralConstants.NON_WORD_PATTERN);
    }

    @Override // sec.bdc.tm.hte.eu.ngram.preprocessing.SourcePreprocessor
    public List<String> preprocessBreaking(String str) {
        return removePattern(removePatternBreaking(removePatternBreaking(removePatternBreaking(Lists.newArrayList(preprocessNonBreaking(str)), GeneralConstants.BREAKING_WHITESPACE_PATTERN), GeneralConstants.URL_PATTERN), GeneralConstants.EMAIL_PATTERN), GeneralConstants.NON_WORD_PATTERN);
    }
}
