package sec.bdc.tm.hte.eu.ngram.postprocessing;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sec.bdc.tm.hte.eu.ngram.structures.Keyphrase;
import sec.bdc.tm.hte.eu.ngram.tokenizer.Tokenizer;

/* loaded from: classes49.dex */
public class Postprocessor {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) Postprocessor.class);
    private static final int PRETTY_PRINT_SKIP_COUNT = 10;

    private Map<String, Multiset<String>> calculateOrths(Stream<String> stream, final Map<String, Pattern> map) {
        final HashMap hashMap = new HashMap();
        stream.forEach(new Consumer(map, hashMap) { // from class: sec.bdc.tm.hte.eu.ngram.postprocessing.Postprocessor$$Lambda$6
            private final Map arg$1;
            private final Map arg$2;

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                this.arg$1 = map;
                this.arg$2 = hashMap;
            }

            @Override // java.util.function.Consumer
            public void accept(Object obj) {
                Postprocessor.findOrths(this.arg$2, (String) obj, this.arg$1.entrySet().iterator());
            }
        });
        return hashMap;
    }

    private Map<String, String> calculatePrettyPrint(Set<String> set, Stream<String> stream) {
        Map<String, Multiset<String>> calculateOrths = calculateOrths(stream, mapPhrases2Patterns(set));
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, Multiset<String>> entry : calculateOrths.entrySet()) {
            String key = entry.getKey();
            Optional<Multiset.Entry<String>> max = entry.getValue().entrySet().stream().max(Comparator.comparing(Postprocessor$$Lambda$5.$instance));
            hashMap.put(key, max.isPresent() ? max.get().getElement() : key);
        }
        return hashMap;
    }

    private void changeToBestOrths(Set<Keyphrase> set, Stream<String> stream) {
        Map<String, String> calculatePrettyPrint = calculatePrettyPrint((Set) set.stream().map(Postprocessor$$Lambda$0.$instance).collect(Collectors.toSet()), stream);
        for (Keyphrase keyphrase : set) {
            keyphrase.setText(calculatePrettyPrint.getOrDefault(keyphrase.getText(), keyphrase.getText()));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void findOrths(Map<String, Multiset<String>> map, String str, Iterator<Map.Entry<String, Pattern>> it) {
        while (it.hasNext()) {
            Map.Entry<String, Pattern> next = it.next();
            String key = next.getKey();
            Matcher matcher = next.getValue().matcher(Tokenizer.normalize(str, false));
            if (matcher.matches()) {
                String group = matcher.group(1);
                map.putIfAbsent(key, HashMultiset.create());
                Multiset<String> multiset = map.get(key);
                multiset.add(group);
                if (multiset.size() > 10) {
                    it.remove();
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final /* synthetic */ Keyphrase lambda$mergeDuplicates$0$Postprocessor(Keyphrase keyphrase) {
        return keyphrase;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final /* synthetic */ boolean lambda$mergeDuplicates$1$Postprocessor(Set set) {
        return set.size() > 1;
    }

    private Map<String, Pattern> mapPhrases2Patterns(Set<String> set) {
        HashMap newHashMap = Maps.newHashMap();
        for (String str : set) {
            newHashMap.put(str, Pattern.compile(".*(" + String.join("\\s*", (Iterable<? extends CharSequence>) Stream.of((Object[]) str.split(StringUtils.SPACE)).map(Postprocessor$$Lambda$7.$instance).collect(Collectors.toList())) + ").*", 2));
        }
        return newHashMap;
    }

    private void mergeDuplicates(Map<Keyphrase, Double> map) {
        Set<Set> set = (Set) ((Map) map.keySet().stream().collect(Collectors.groupingBy(Postprocessor$$Lambda$1.$instance, Collectors.mapping(Postprocessor$$Lambda$2.$instance, Collectors.toSet())))).values().stream().filter(Postprocessor$$Lambda$3.$instance).collect(Collectors.toSet());
        int i = 0;
        for (Set<Keyphrase> set2 : set) {
            Optional max = set2.stream().max(Comparator.comparing(Postprocessor$$Lambda$4.$instance));
            if (max.isPresent()) {
                Keyphrase keyphrase = (Keyphrase) max.get();
                for (Keyphrase keyphrase2 : set2) {
                    if (!keyphrase2.equals(keyphrase)) {
                        int count = keyphrase2.getCount();
                        double doubleValue = map.get(keyphrase2).doubleValue();
                        keyphrase.setCount(keyphrase.getCount() + count);
                        map.put(keyphrase, Double.valueOf(map.get(keyphrase).doubleValue() + doubleValue));
                        map.remove(keyphrase2);
                        i++;
                    }
                }
            }
        }
        LOG.info("{} duplicate sets after phrase trimming merged, {} phrases removed.", Integer.valueOf(set.size()), Integer.valueOf(i));
    }

    private static void trimTooLongPhrases(Set<Keyphrase> set, int i) {
        int i2 = 0;
        for (Keyphrase keyphrase : set) {
            ArrayList newArrayList = Lists.newArrayList(keyphrase.getText().split(StringUtils.SPACE));
            if (newArrayList.size() > i) {
                ArrayList newArrayList2 = Lists.newArrayList(newArrayList.subList(0, i - 1));
                newArrayList2.add("...");
                keyphrase.setText(String.join(StringUtils.SPACE, newArrayList2));
                i2++;
            }
        }
        LOG.info("Trimmed {} phrases longer than {} words.", Integer.valueOf(i2), Integer.valueOf(i));
    }

    public void postprocessPhrases(Map<Keyphrase, Double> map, Stream<String> stream, int i, boolean z) {
        changeToBestOrths(map.keySet(), stream);
        if (z) {
            trimTooLongPhrases(map.keySet(), i);
            mergeDuplicates(map);
        }
    }
}
