package sec.bdc.tm.hte.eu.ngram.preprocessing;

import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import sec.bdc.tm.hte.eu.ngram.preprocessing.constants.TwitterConstants;

/* loaded from: classes49.dex */
public class TwitterPreprocessor extends GeneralPreprocessor {
    /* JADX INFO: Access modifiers changed from: private */
    public static String preprocessNonBreakingTwitter(String str) {
        return replaceStrings(removeCutWords(removePattern(str, TwitterConstants.RETWEET_PATTERN)), TwitterConstants.TWITTER_SUBSTITUTIONS);
    }

    private static String removeCutWords(String str) {
        String str2 = str;
        Matcher matcher = TwitterConstants.CUT_WORD_PATTERN.matcher(str2);
        if (matcher.find()) {
            str2 = str2.substring(0, matcher.end(1)).trim();
        }
        return str2.trim();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static List<String> splitOnHashtagSequences(String str) {
        ArrayList newArrayList = Lists.newArrayList();
        Matcher matcher = TwitterConstants.HASHTAG_SEQUENCE_PATTERN.matcher(str);
        while (matcher.find()) {
            StringBuffer stringBuffer = new StringBuffer();
            matcher.appendReplacement(stringBuffer, "");
            addIfNonempty(newArrayList, stringBuffer);
            for (String str2 : matcher.group(0).split(TwitterConstants.WHITESPACE_OR_START_REGEX)) {
                String trim = str2.trim();
                if (!trim.isEmpty()) {
                    newArrayList.add(trim);
                }
            }
        }
        StringBuffer stringBuffer2 = new StringBuffer();
        matcher.appendTail(stringBuffer2);
        addIfNonempty(newArrayList, stringBuffer2);
        return newArrayList;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static List<String> splitOnSpanSplitCharacters(String str) {
        ArrayList newArrayList = Lists.newArrayList();
        Matcher matcher = TwitterConstants.SPAN_SPLIT_PATTERN.matcher(str);
        while (matcher.find()) {
            StringBuffer stringBuffer = new StringBuffer();
            matcher.appendReplacement(stringBuffer, "");
            addIfNonempty(newArrayList, stringBuffer);
        }
        StringBuffer stringBuffer2 = new StringBuffer();
        matcher.appendTail(stringBuffer2);
        addIfNonempty(newArrayList, stringBuffer2);
        return newArrayList;
    }

    @Override // sec.bdc.tm.hte.eu.ngram.preprocessing.GeneralPreprocessor, sec.bdc.tm.hte.eu.ngram.preprocessing.SourcePreprocessor
    public String preprocess(String str) {
        return preprocessNonBreakingTwitter(super.preprocess(str));
    }

    @Override // sec.bdc.tm.hte.eu.ngram.preprocessing.GeneralPreprocessor, sec.bdc.tm.hte.eu.ngram.preprocessing.SourcePreprocessor
    public List<String> preprocessBreaking(String str) {
        return map(TwitterPreprocessor$$Lambda$2.$instance, flatMap(TwitterPreprocessor$$Lambda$1.$instance, flatMap(TwitterPreprocessor$$Lambda$0.$instance, super.preprocessBreaking(str))));
    }
}
