package sec.bdc.tm.hte.eu.ngram;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sec.bdc.nlp.Language;
import sec.bdc.nlp.exception.NLPModuleException;
import sec.bdc.nlp.exception.ResourceLoadingException;
import sec.bdc.nlp.exception.UnsupportedLanguageException;
import sec.bdc.tm.hte.common.ds.HashTag;
import sec.bdc.tm.hte.common.intf.HashTagExtractor;
import sec.bdc.tm.hte.eu.ngram.interfaces.Ranker;
import sec.bdc.tm.hte.eu.ngram.postprocessing.Postprocessor;
import sec.bdc.tm.hte.eu.ngram.ranking.RankerLoader;
import sec.bdc.tm.hte.eu.ngram.settings.NgramExtractorSettings;
import sec.bdc.tm.hte.eu.ngram.structures.Document;
import sec.bdc.tm.hte.eu.ngram.structures.Keyphrase;
import sec.bdc.tm.hte.eu.ngram.writer.KPEResult;
import sec.bdc.tm.hte.eu.ngram.writer.KPEValue;
import sec.bdc.tm.hte.eu.ngram.writer.KPEWord;
import sec.bdc.tm.hte.eu.pipeline.detector.patternmatcher.PatternMatcher;
import sec.bdc.tm.hte.eu.preprocessing.bnlp.BasicNLPAnalyzerFactoryEu;
import sec.bdc.tm.hte.eu.preprocessing.resource.ResourceObject;

/* loaded from: classes49.dex */
public class NgramExtractor implements HashTagExtractor {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) NgramExtractor.class);
    private final PatternMatcher patternMatcher;
    private final CorpusPhraseExtractor phraseExtractor;
    private final Postprocessor postprocessor;
    private final Ranker ranker;
    private final NgramExtractorSettings settings;

    public NgramExtractor(Language language) throws NLPModuleException {
        try {
            this.settings = new NgramExtractorSettings(language);
            this.patternMatcher = new PatternMatcher(language);
            this.phraseExtractor = new CorpusPhraseExtractor(this.settings, new BasicNLPAnalyzerFactoryEu().getInstance(language));
            this.ranker = RankerLoader.loadRanker(this.settings);
            this.postprocessor = new Postprocessor();
        } catch (IOException | ResourceLoadingException | UnsupportedLanguageException | NgramException e) {
            throw new NLPModuleException(e);
        }
    }

    public NgramExtractor(ResourceObject resourceObject) throws NLPModuleException {
        try {
            this.settings = new NgramExtractorSettings(resourceObject);
            this.patternMatcher = new PatternMatcher(resourceObject);
            this.phraseExtractor = new CorpusPhraseExtractor(this.settings, new BasicNLPAnalyzerFactoryEu().getInstance(resourceObject), resourceObject);
            this.ranker = RankerLoader.loadRanker(this.settings);
            this.postprocessor = new Postprocessor();
        } catch (IOException | ResourceLoadingException | UnsupportedLanguageException | NgramException e) {
            throw new NLPModuleException(e);
        }
    }

    private List<HashTag> convert(KPEResult kPEResult) {
        return (List) kPEResult.getValues().stream().flatMap(NgramExtractor$$Lambda$1.$instance).filter(new Predicate(this) { // from class: sec.bdc.tm.hte.eu.ngram.NgramExtractor$$Lambda$2
            private final NgramExtractor arg$1;

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                this.arg$1 = this;
            }

            @Override // java.util.function.Predicate
            public boolean test(Object obj) {
                return this.arg$1.lambda$convert$1$NgramExtractor((KPEWord) obj);
            }
        }).map(NgramExtractor$$Lambda$3.$instance).collect(Collectors.toList());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final /* synthetic */ HashTag lambda$convert$2$NgramExtractor(KPEWord kPEWord) {
        return new HashTag(kPEWord.getText(), kPEWord.getWeight());
    }

    @Override // sec.bdc.tm.hte.common.intf.HashTagExtractor
    public List<HashTag> extract(String str) throws NLPModuleException {
        LOG.info("Keyphrase extraction and ranking started.");
        try {
            ArrayList arrayList = new ArrayList();
            arrayList.add(new Document(str));
            Set<Keyphrase> extractPhrases = this.phraseExtractor.extractPhrases(arrayList.stream());
            LOG.info("Extracted {} phrases from foreground corpus ({} documents).", Integer.valueOf(extractPhrases.size()), Integer.valueOf(arrayList.size()));
            LOG.info("Ranking {} keyphrases with ranker: {}", Integer.valueOf(extractPhrases.size()), this.ranker.getClass().getSimpleName());
            Map<Keyphrase, Double> calculateWeights = this.ranker.calculateWeights(extractPhrases);
            HashSet<Keyphrase> newHashSet = Sets.newHashSet(KPEValue.getTopSortedKeyphrases(calculateWeights, this.settings));
            HashMap newHashMap = Maps.newHashMap();
            for (Keyphrase keyphrase : newHashSet) {
                newHashMap.put(keyphrase, calculateWeights.get(keyphrase));
            }
            LOG.info("Removed {} keyphrases not matching output constraints.", Integer.valueOf(extractPhrases.size() - newHashSet.size()));
            LOG.info("Postprocessing {} keyphrases.", Integer.valueOf(newHashMap.size()));
            this.postprocessor.postprocessPhrases(newHashMap, arrayList.stream().map(NgramExtractor$$Lambda$0.$instance), this.settings.getOutputMaxWords(), this.settings.getNgramTrim());
            LOG.info("Writing result.");
            return convert(new KPEResult(newHashMap, this.settings));
        } catch (NgramException e) {
            throw new NLPModuleException(e);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public final /* synthetic */ boolean lambda$convert$1$NgramExtractor(KPEWord kPEWord) {
        return this.patternMatcher.exclude(kPEWord.getText());
    }
}
