package com.xiaomi.minlptokenizer;

import com.xiaomi.minlp.intervener.re.RegexIntervener;
import com.xiaomi.minlp.intervener.re.RegexRule;
import com.xiaomi.minlptokenizer.common.Lexicon;
import com.xiaomi.minlptokenizer.common.Onehot;
import com.xiaomi.minlptokenizer.common.Viterbi;
import defpackage.C$r8$backportedMethods$utility$String$2$joinIterable;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.commons.lang3.StringUtils;

/* loaded from: classes2.dex */
public abstract class Tokenizer {
    public static int TAG_LENGTH = 5;
    private RegexIntervener intervener;
    private Lexicon lexicon;
    private Onehot onehot;

    public Tokenizer(String str) {
        this.onehot = new Onehot(str);
    }

    public Tokenizer(String str, String[] strArr) {
        this.onehot = new Onehot(str);
        this.lexicon = new Lexicon(strArr);
    }

    public Tokenizer(String str, String[] strArr, String str2) {
        this(str, strArr);
        this.intervener = new RegexIntervener(str2);
    }

    public void addRegexRules(InputStream inputStream) {
        if (this.intervener == null) {
            this.intervener = new RegexIntervener();
        }
        this.intervener.addRules(inputStream);
    }

    public void addRegexRules(Collection<RegexRule> collection) {
        if (this.intervener == null) {
            this.intervener = new RegexIntervener();
        }
        this.intervener.addRules(collection);
    }

    public void addWords(InputStream inputStream) {
        if (this.lexicon == null) {
            this.lexicon = new Lexicon();
        }
        this.lexicon.addWords(inputStream);
        this.lexicon.build();
    }

    public void addWords(Collection<String> collection) {
        if (this.lexicon == null) {
            this.lexicon = new Lexicon();
        }
        this.lexicon.addWords(collection);
        this.lexicon.build();
    }

    public List<String> cut(String str) {
        String replaceAll = str.replaceAll("[\\p{Z}\\s]+", StringUtils.SPACE);
        ArrayList arrayList = new ArrayList();
        if (replaceAll.trim().isEmpty()) {
            return arrayList;
        }
        float[][][] infer = infer(this.onehot.getCharOnehot(replaceAll), 1, replaceAll.length());
        Lexicon lexicon = this.lexicon;
        if (lexicon != null) {
            lexicon.parse(replaceAll, infer);
        }
        int[] bestPath = Viterbi.getBestPath(infer[0], replaceAll.length(), TAG_LENGTH);
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < replaceAll.length(); i++) {
            sb.append(replaceAll.charAt(i));
            if (bestPath[i] == 1 || bestPath[i] == 4) {
                arrayList.add(sb.toString());
                sb.setLength(0);
            }
        }
        if (sb.length() > 0) {
            arrayList.add(sb.toString());
        }
        String join = C$r8$backportedMethods$utility$String$2$joinIterable.join(StringUtils.SPACE, arrayList);
        RegexIntervener regexIntervener = this.intervener;
        if (regexIntervener != null) {
            join = regexIntervener.intervene(join);
        }
        return Arrays.asList(join.split("\\s+"));
    }

    public abstract void destroy();

    protected abstract float[][][] infer(int[][] iArr, int i, int i2);
}
