package sec.bdc.tm.hte.eu.domain.vectorizer;

import com.google.common.collect.EvictingQueue;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import sec.bdc.tm.hte.eu.domain.feature.Feature;
import sec.bdc.tm.hte.eu.domain.feature.ListNGram;
import sec.bdc.tm.hte.eu.domain.vector.FeatureVector;
import sec.bdc.tm.hte.eu.domain.vector.MapFeatureVector;

/* loaded from: classes49.dex */
public class NgramVectorizer implements Vectorizer<List<String>> {
    private static final int DEFAULT_MAX_DOC_COUNT = 100;
    private static final int DEFAULT_MAX_N = 2;
    private static final int DEFAULT_MIN_DOC_COUNT = 3;
    private static final int DEFAULT_MIN_N = 1;
    private static final boolean DEFAULT_SUBLINEAR_TF = true;
    private static final long serialVersionUID = 1;
    private final Map<Feature, Integer> feature2index;
    private final int maxDocCount;
    private final int maxN;
    private final int minDocCount;
    private final int minN;
    private boolean sublinearTf;

    public NgramVectorizer() {
        this(1, 2, 3, 100, true);
    }

    public NgramVectorizer(int i, int i2, int i3, int i4, boolean z) {
        this.feature2index = Maps.newHashMap();
        this.minN = i;
        this.maxN = i2;
        this.minDocCount = i3;
        this.maxDocCount = i4;
        this.sublinearTf = z;
    }

    private List<Feature> collectFeatures(List<String> list) {
        ArrayList newArrayList = Lists.newArrayList();
        List list2 = (List) list.stream().map(NgramVectorizer$$Lambda$1.$instance).collect(Collectors.toList());
        EvictingQueue create = EvictingQueue.create(this.maxN);
        Iterator it = list2.iterator();
        while (it.hasNext()) {
            create.add((Feature) it.next());
            ArrayList newArrayList2 = Lists.newArrayList(create);
            for (int i = this.minN; i <= newArrayList2.size(); i++) {
                newArrayList.add(new ListNGram((List<Feature>) newArrayList2.subList(newArrayList2.size() - i, newArrayList2.size())));
            }
        }
        return newArrayList;
    }

    private void increaseFeatureCount(FeatureVector featureVector, Feature feature) {
        if (this.feature2index.containsKey(feature)) {
            int intValue = this.feature2index.get(feature).intValue();
            if (featureVector.hasValue(feature)) {
                featureVector.put(feature, intValue, featureVector.get(feature) + 1.0d);
            } else {
                featureVector.put(feature, intValue, 1.0d);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final /* synthetic */ ListNGram lambda$collectFeatures$1$NgramVectorizer(String str) {
        return new ListNGram(str);
    }

    private void scaleSublinearTf(FeatureVector featureVector) {
        for (Map.Entry<Feature, Double> entry : featureVector.getFeature2Value().entrySet()) {
            entry.setValue(Double.valueOf(Math.log(entry.getValue().doubleValue()) + 1.0d));
        }
    }

    @Override // sec.bdc.tm.hte.eu.domain.vectorizer.Vectorizer
    public void fit(List<List<String>> list) {
        final HashMultiset create = HashMultiset.create();
        Iterator<List<String>> it = list.iterator();
        while (it.hasNext()) {
            create.addAll(new HashSet(collectFeatures(it.next())));
        }
        create.elementSet().removeIf(new Predicate(this, create) { // from class: sec.bdc.tm.hte.eu.domain.vectorizer.NgramVectorizer$$Lambda$0
            private final NgramVectorizer arg$1;
            private final Multiset arg$2;

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                this.arg$1 = this;
                this.arg$2 = create;
            }

            @Override // java.util.function.Predicate
            public boolean test(Object obj) {
                return this.arg$1.lambda$fit$0$NgramVectorizer(this.arg$2, (Feature) obj);
            }
        });
        int i = 0;
        for (E e : create.elementSet()) {
            if (!this.feature2index.containsKey(e)) {
                this.feature2index.put(e, Integer.valueOf(i));
                i++;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public final /* synthetic */ boolean lambda$fit$0$NgramVectorizer(Multiset multiset, Feature feature) {
        return multiset.count(feature) < this.minDocCount || multiset.count(feature) > this.maxDocCount;
    }

    @Override // sec.bdc.tm.hte.eu.domain.vectorizer.Vectorizer
    public FeatureVector transform(List<String> list) {
        List<Feature> collectFeatures = collectFeatures(list);
        MapFeatureVector mapFeatureVector = new MapFeatureVector(this.feature2index.size());
        Iterator<Feature> it = collectFeatures.iterator();
        while (it.hasNext()) {
            increaseFeatureCount(mapFeatureVector, it.next());
        }
        if (this.sublinearTf) {
            scaleSublinearTf(mapFeatureVector);
        }
        return mapFeatureVector;
    }
}
