package sec.bdc.tm.hte.eu.ngram.clustering;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.samsung.android.app.captureplugin.utils.SAUtils;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.EnumUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sec.bdc.nlp.Language;
import sec.bdc.nlp.exception.UnsupportedLanguageException;
import sec.bdc.tm.hte.eu.preprocessing.bnlp.stem.Stemmer;
import sec.bdc.tm.hte.eu.preprocessing.bnlp.stem.StemmerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: classes49.dex */
public class StringNormalizer {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) StringNormalizer.class);
    private static final Set<Language> STEMMED_LANGUAGES = ImmutableSet.of(Language.en, Language.es);
    private final Set<String> normalizedStrings = Sets.newHashSet();
    private final Map<String, String> original2normalized = Maps.newHashMap();
    private final Stemmer stemmer;

    public StringNormalizer(Set<String> set, Language language) {
        String lowerCase = language.toString().toLowerCase();
        if (STEMMED_LANGUAGES.contains(language) && EnumUtils.isValidEnum(Language.class, lowerCase)) {
            LOG.info("Loading stemmer for string normalization for language {}.", lowerCase);
            Stemmer stemmer = null;
            try {
                stemmer = StemmerFactory.create(Language.valueOf(lowerCase));
            } catch (UnsupportedLanguageException e) {
                LOG.error("Error creating stemmer for " + lowerCase, (Throwable) e);
            }
            this.stemmer = stemmer;
        } else {
            LOG.info("No stemmer available for language '{}'. Stemming will not be used for normalization.", language);
            this.stemmer = null;
        }
        normalize(set);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final /* synthetic */ Integer lambda$unnormalizeClusters$0$StringNormalizer(Map map, Map.Entry entry) {
        return (Integer) map.get(entry.getValue());
    }

    private void normalize(Set<String> set) {
        for (String str : set) {
            String replaceAll = str.toLowerCase().replaceAll("[_@#\\-,\"']", StringUtils.SPACE).replaceAll(" +", StringUtils.SPACE).replaceAll("\\bii\\b", "2").replaceAll("\\biii\\b", "3").replaceAll("\\biv\\b", SAUtils.TagBoardState.Search).replaceAll("\\bv\\b", SAUtils.TagBoardState.Detail).replaceAll("\\bvi\\b", SAUtils.TagBoardState.Edit).replaceAll("\\bvii\\b", "7").replaceAll("\\bviii\\b", "8").replaceAll("\\bix\\b", "9");
            if (this.stemmer != null) {
                replaceAll = this.stemmer.stem(replaceAll);
            }
            String trim = replaceAll.trim();
            this.normalizedStrings.add(trim);
            this.original2normalized.put(str, trim);
        }
    }

    public Set<String> getNormalizedStrings() {
        return this.normalizedStrings;
    }

    public Set<Set<String>> unnormalizeClusters(Set<Set<String>> set) {
        final HashMap newHashMap = Maps.newHashMap();
        int i = 0;
        Iterator<Set<String>> it = set.iterator();
        while (it.hasNext()) {
            Iterator<String> it2 = it.next().iterator();
            while (it2.hasNext()) {
                newHashMap.put(it2.next(), Integer.valueOf(i));
            }
            i++;
        }
        return Sets.newHashSet(((Map) ((Map) this.original2normalized.entrySet().stream().collect(Collectors.toMap(StringNormalizer$$Lambda$0.$instance, new Function(newHashMap) { // from class: sec.bdc.tm.hte.eu.ngram.clustering.StringNormalizer$$Lambda$1
            private final Map arg$1;

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                this.arg$1 = newHashMap;
            }

            @Override // java.util.function.Function
            public Object apply(Object obj) {
                return StringNormalizer.lambda$unnormalizeClusters$0$StringNormalizer(this.arg$1, (Map.Entry) obj);
            }
        }))).entrySet().stream().collect(Collectors.groupingBy(StringNormalizer$$Lambda$2.$instance, Collectors.mapping(StringNormalizer$$Lambda$3.$instance, Collectors.toSet())))).values());
    }
}
