package sec.bdc.tm.hte.eu.ngram.tokenizer;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import sec.bdc.tm.hte.eu.ngram.NgramException;

@FunctionalInterface
/* loaded from: classes49.dex */
public interface Tokenizer {
    static String normalize(String str, boolean z) {
        String str2 = str;
        if (z) {
            str2 = str2.toLowerCase();
        }
        return str2.replaceAll("[@#]", StringUtils.SPACE).replaceAll(" +", StringUtils.SPACE).trim();
    }

    List<String> tokenize(String str) throws NgramException;

    default List<String> tokenizeNormalize(String str) throws NgramException {
        List<String> list = tokenize(str);
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String normalize = normalize(it.next(), true);
            if (!normalize.isEmpty()) {
                arrayList.add(normalize);
            }
        }
        return arrayList;
    }
}
