package sec.bdc.tm.hte.eu.pipeline;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import sec.bdc.nlp.Language;
import sec.bdc.nlp.exception.NLPModuleException;
import sec.bdc.tm.hte.common.ds.HashTag;
import sec.bdc.tm.hte.common.intf.HashTagExtractor;
import sec.bdc.tm.hte.eu.pipeline.detector.PhraseDetector;
import sec.bdc.tm.hte.eu.pipeline.ranker.MLBasedRanker;
import sec.bdc.tm.hte.eu.pipeline.ranker.PipelineRanker;
import sec.bdc.tm.hte.eu.pipeline.ranker.RankerType;
import sec.bdc.tm.hte.eu.preprocessing.PreprocessedSentence;
import sec.bdc.tm.hte.eu.preprocessing.Preprocessor;
import sec.bdc.tm.hte.eu.preprocessing.resource.ResourceObject;
import sec.bdc.tm.hte.eu.ranker.Ranker;
import sec.bdc.tm.hte.eu.ranker.ScoredPhrase;

/* loaded from: classes49.dex */
public class PipelineExtractor implements HashTagExtractor {
    private final PhraseDetector phraseDetector;
    private final Preprocessor preprocessor;
    private final Ranker ranker;

    public PipelineExtractor(Language language, RankerType rankerType) throws NLPModuleException {
        this(new Preprocessor(language), new PhraseDetector(language), getRanker(rankerType, language));
    }

    public PipelineExtractor(Preprocessor preprocessor, PhraseDetector phraseDetector, Ranker ranker) {
        this.preprocessor = preprocessor;
        this.phraseDetector = phraseDetector;
        this.ranker = ranker;
    }

    public PipelineExtractor(ResourceObject resourceObject, RankerType rankerType) throws NLPModuleException {
        this(new Preprocessor(resourceObject), new PhraseDetector(resourceObject), getRanker(rankerType, resourceObject));
    }

    public static List<HashTag> convert(List<ScoredPhrase> list) {
        return (List) list.stream().map(PipelineExtractor$$Lambda$0.$instance).collect(Collectors.toList());
    }

    private static Ranker getRanker(RankerType rankerType, Language language) throws NLPModuleException {
        switch (rankerType) {
            case PIPELINE_RANKER:
                return new PipelineRanker();
            case ML_BASED_RANKER:
                return new MLBasedRanker(language);
            default:
                throw new IllegalArgumentException(String.format("Unknown extractor type %s.", rankerType));
        }
    }

    private static Ranker getRanker(RankerType rankerType, ResourceObject resourceObject) throws NLPModuleException {
        switch (rankerType) {
            case PIPELINE_RANKER:
                return new PipelineRanker();
            case ML_BASED_RANKER:
                return new MLBasedRanker(resourceObject);
            default:
                throw new IllegalArgumentException(String.format("Unknown extractor type %s.", rankerType));
        }
    }

    public static List<HashTag> postprocess(List<HashTag> list) {
        ArrayList arrayList = new ArrayList();
        for (HashTag hashTag : list) {
            if (r2.size() != Arrays.asList(hashTag.getRawText().split(StringUtils.SPACE)).stream().distinct().count()) {
                arrayList.add(hashTag);
            }
        }
        list.removeAll(arrayList);
        return list;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static HashTag scoredPhrase2hashtag(ScoredPhrase scoredPhrase) {
        return new HashTag(scoredPhrase.getPhrase().getRawText().toLowerCase(), scoredPhrase.getScore());
    }

    @Override // sec.bdc.tm.hte.common.intf.HashTagExtractor
    public List<HashTag> extract(String str) {
        List<PreprocessedSentence> preprocess = this.preprocessor.preprocess(str);
        this.phraseDetector.addPhrases(preprocess);
        return postprocess(convert(this.ranker.rank(preprocess, str)));
    }
}
