package sec.bdc.nlp.tokenizer;

import java.util.ArrayList;
import java.util.List;
import sec.bdc.ml.classification.common.SeqClassificationResult;
import sec.bdc.ml.classification.intf.SequenceClassifier;
import sec.bdc.ml.classification.lccrf.CRFTagger;
import sec.bdc.ml.common.ds.featurevector.StringFeaturesVector;
import sec.bdc.nlp.ds.Token;
import sec.bdc.nlp.ds.Word;
import sec.bdc.nlp.tokenizer.intf.Tokenizer;
import sec.bdc.nlp.tokenizer.resource.crf.KoTokenizerResourceCRF;

/* loaded from: classes49.dex */
public class CRFTokenizerKo implements Tokenizer {
    private final SequenceClassifier<StringFeaturesVector> crfTagger;

    public CRFTokenizerKo(KoTokenizerResourceCRF koTokenizerResourceCRF) {
        this.crfTagger = new CRFTagger(koTokenizerResourceCRF.getModel());
    }

    private StringFeaturesVector[] makeFeatureVector(String str, List<Integer> list) {
        ArrayList arrayList = new ArrayList();
        char c = 'B';
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (c == 'B' && charAt != ' ') {
                arrayList.add(new StringFeaturesVector(new String[]{String.valueOf(charAt), "B"}));
                list.add(Integer.valueOf(i));
                c = 'I';
            } else if (c == 'I') {
                if (charAt == ' ') {
                    c = 'B';
                } else {
                    arrayList.add(new StringFeaturesVector(new String[]{String.valueOf(charAt), "I"}));
                    list.add(Integer.valueOf(i));
                }
            }
        }
        return (StringFeaturesVector[]) arrayList.toArray(new StringFeaturesVector[1]);
    }

    private List<Token> makeTokenList(StringFeaturesVector[] stringFeaturesVectorArr, SeqClassificationResult seqClassificationResult, List<Integer> list, int i, int i2) {
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        String str = "";
        for (int i3 = i; i3 < i2; i3++) {
            if (!z) {
                sb.append(stringFeaturesVectorArr[i3].getFeatures()[0].getValue());
                z = true;
                str = seqClassificationResult.getBestLabel(i3).length() > 2 ? seqClassificationResult.getBestLabel(i3).substring(2) : "DEFAULT";
            } else if (z) {
                if (seqClassificationResult.getBestLabel(i3).startsWith("B")) {
                    arrayList.add(new Token(list.get(i3 - sb.length()).intValue(), sb.toString(), str));
                    sb = new StringBuilder();
                    str = seqClassificationResult.getBestLabel(i3).length() > 2 ? seqClassificationResult.getBestLabel(i3).substring(2) : "DEFAULT";
                }
                sb.append(stringFeaturesVectorArr[i3].getFeatures()[0].getValue());
            }
        }
        if (sb.length() != 0) {
            arrayList.add(new Token(list.get(i2 - sb.length()).intValue(), sb.toString(), str));
        }
        return arrayList;
    }

    private List<Word> makeWordList(String str, StringFeaturesVector[] stringFeaturesVectorArr, SeqClassificationResult seqClassificationResult, List<Integer> list) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (int i2 = 0; i2 < stringFeaturesVectorArr.length; i2++) {
            if (stringFeaturesVectorArr[i2].getFeatures()[1].getValue().startsWith("B") && i < i2) {
                List<Token> makeTokenList = makeTokenList(stringFeaturesVectorArr, seqClassificationResult, list, i, i2);
                Word word = new Word(str.substring(list.get(i).intValue(), list.get(i2).intValue()));
                word.setTokenList(makeTokenList);
                arrayList.add(word);
                i = i2;
            }
        }
        List<Token> makeTokenList2 = makeTokenList(stringFeaturesVectorArr, seqClassificationResult, list, i, stringFeaturesVectorArr.length);
        Word word2 = new Word(str.substring(list.get(i).intValue(), list.get(stringFeaturesVectorArr.length - 1).intValue() + 1));
        word2.setTokenList(makeTokenList2);
        arrayList.add(word2);
        return arrayList;
    }

    @Override // sec.bdc.nlp.tokenizer.intf.Tokenizer
    public List<Token> tokenizeSentence(String str) {
        return null;
    }

    @Override // sec.bdc.nlp.tokenizer.intf.Tokenizer
    public List<Token> tokenizeSentenceWithoutSpace(String str) {
        ArrayList arrayList = new ArrayList();
        StringFeaturesVector[] makeFeatureVector = makeFeatureVector(str, arrayList);
        return makeTokenList(makeFeatureVector, this.crfTagger.classify(makeFeatureVector), arrayList, 0, makeFeatureVector.length);
    }

    @Override // sec.bdc.nlp.tokenizer.intf.Tokenizer
    public List<Word> tokenizeSentenceWithoutSpaceToWordList(String str) {
        ArrayList arrayList = new ArrayList();
        StringFeaturesVector[] makeFeatureVector = makeFeatureVector(str, arrayList);
        return makeWordList(str, makeFeatureVector, this.crfTagger.classify(makeFeatureVector), arrayList);
    }

    @Override // sec.bdc.nlp.tokenizer.intf.Tokenizer
    public List<Token> tokenizeWord(String str) {
        return null;
    }
}
