package com.backend.nlp;

import com.backend.query_analysis.POSTag;
import com.mobvoi.app.platform.common.util.StringUtil;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

/* loaded from: classes.dex */
public class ChineseSeg {
    private Set<String> vocabSet;

    public ChineseSeg(String str) {
        this.vocabSet = DictUtil.setupDict(str);
    }

    public ChineseSeg(Set<String> set) {
        this.vocabSet = set;
    }

    private String longestMatch(String str) {
        int length = str.length();
        while (length > 0) {
            String substring = str.substring(0, length);
            if (this.vocabSet.contains(substring)) {
                return substring;
            }
            length--;
        }
        if (length != 0) {
            return str;
        }
        System.out.println("warning!!!!!!!!!!!!, in=" + str);
        return str;
    }

    public static void main(String[] strArr) throws IOException {
        HashSet hashSet = new HashSet();
        hashSet.add("a");
        hashSet.add("ab");
        hashSet.add("b");
        hashSet.add("c");
        hashSet.add(POSTag.ADV);
        hashSet.add("cd");
        hashSet.add("bcd");
        hashSet.add("让子弹");
        hashSet.add("弹");
        hashSet.add("飞");
        ChineseSeg chineseSeg = new ChineseSeg(hashSet);
        System.out.println(chineseSeg.segmentation("a b c d"));
        System.out.println(chineseSeg.segmentation("让 子弹 飞"));
    }

    public String segmentation(String str) {
        StringBuilder sb = new StringBuilder();
        String replaceAll = str.replaceAll("\\s+", StringUtil.EMPTY_STRING);
        while (replaceAll.length() > 0) {
            String longestMatch = longestMatch(replaceAll);
            replaceAll = replaceAll.replaceFirst(longestMatch, StringUtil.EMPTY_STRING);
            sb.append(longestMatch + " ");
        }
        return sb.toString().trim();
    }
}
