package com.hankcs.hanlp.model.crf;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.model.crf.crfpp.FeatureIndex;
import com.hankcs.hanlp.model.perceptron.PerceptronSegmenter;
import com.hankcs.hanlp.model.perceptron.feature.FeatureMap;
import com.hankcs.hanlp.model.perceptron.instance.CWSInstance;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/* loaded from: classes.dex */
public class CRFSegmenter extends CRFTagger implements Segmenter {
    private PerceptronSegmenter perceptronSegmenter;

    public CRFSegmenter() throws IOException {
        this(HanLP.Config.CRFCWSModelPath);
    }

    public CRFSegmenter(String str) throws IOException {
        super(str);
        if (str != null) {
            this.perceptronSegmenter = new PerceptronSegmenter(this.model);
        }
    }

    private CWSInstance createInstance(String str) {
        final FeatureTemplate[] featureTemplateArray = this.model.getFeatureTemplateArray();
        return new CWSInstance(str, this.model.featureMap) { // from class: com.hankcs.hanlp.model.crf.CRFSegmenter.1
            @Override // com.hankcs.hanlp.model.perceptron.instance.CWSInstance
            protected int[] extractFeature(String str2, FeatureMap featureMap, int i) {
                StringBuilder sb = new StringBuilder();
                LinkedList linkedList = new LinkedList();
                for (int i2 = 0; i2 < featureTemplateArray.length; i2++) {
                    Iterator<int[]> it = featureTemplateArray[i2].offsetList.iterator();
                    Iterator<String> it2 = featureTemplateArray[i2].delimiterList.iterator();
                    it2.next();
                    while (it.hasNext()) {
                        int i3 = it.next()[0] + i;
                        if (i3 < 0) {
                            sb.append(FeatureIndex.BOS[-(i3 + 1)]);
                        } else if (i3 >= str2.length()) {
                            sb.append(FeatureIndex.EOS[i3 - str2.length()]);
                        } else {
                            sb.append(str2.charAt(i3));
                        }
                        if (it2.hasNext()) {
                            sb.append(it2.next());
                        } else {
                            sb.append(i2);
                        }
                    }
                    addFeatureThenClear(sb, linkedList, featureMap);
                }
                return toFeatureArray(linkedList);
            }
        };
    }

    @Override // com.hankcs.hanlp.model.crf.CRFTagger
    protected void convertCorpus(Sentence sentence, BufferedWriter bufferedWriter) throws IOException {
        Iterator<Word> it = sentence.toSimpleWordList().iterator();
        while (it.hasNext()) {
            String convert = CharTable.convert(it.next().value);
            if (convert.length() == 1) {
                bufferedWriter.write(convert);
                bufferedWriter.write(9);
                bufferedWriter.write(83);
                bufferedWriter.write(10);
            } else {
                bufferedWriter.write(convert.charAt(0));
                bufferedWriter.write(9);
                bufferedWriter.write(66);
                bufferedWriter.write(10);
                for (int i = 1; i < convert.length() - 1; i++) {
                    bufferedWriter.write(convert.charAt(i));
                    bufferedWriter.write(9);
                    bufferedWriter.write(77);
                    bufferedWriter.write(10);
                }
                bufferedWriter.write(convert.charAt(convert.length() - 1));
                bufferedWriter.write(9);
                bufferedWriter.write(69);
                bufferedWriter.write(10);
            }
        }
    }

    @Override // com.hankcs.hanlp.model.crf.CRFTagger
    protected String getDefaultFeatureTemplate() {
        return "# Unigram\nU0:%x[-1,0]\nU1:%x[0,0]\nU2:%x[1,0]\nU3:%x[-2,0]%x[-1,0]\nU4:%x[-1,0]%x[0,0]\nU5:%x[0,0]%x[1,0]\nU6:%x[1,0]%x[2,0]\n\n# Bigram\nB";
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public List<String> segment(String str) {
        LinkedList linkedList = new LinkedList();
        segment(str, CharTable.convert(str), linkedList);
        return linkedList;
    }

    @Override // com.hankcs.hanlp.tokenizer.lexical.Segmenter
    public void segment(String str, String str2, List<String> list) {
        this.perceptronSegmenter.segment(str, createInstance(str2), list);
    }
}
