package com.hankcs.hanlp.seg;

import cn.mc.module.event.ui.important.ImportantEventCustomActivity;
import com.hankcs.hanlp.algorithm.Viterbi;
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionaryTransformMatrixDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.seg.NShort.Path.AtomNode;
import com.hankcs.hanlp.seg.common.Graph;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.seg.common.Vertex;
import com.hankcs.hanlp.seg.common.WordNet;
import com.hankcs.hanlp.utility.TextUtility;
import com.xiaomi.mipush.sdk.Constants;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;

/* loaded from: classes3.dex */
public abstract class WordBasedSegment extends Segment {
    private static List<AtomNode> atomSegment(String str, int i, int i2) {
        int i3;
        boolean z;
        if (i2 < i) {
            throw new RuntimeException("start=" + i + " < end=" + i2);
        }
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        char[] charArray = str.substring(i, i2).toCharArray();
        int[] iArr = new int[charArray.length];
        for (int i4 = 0; i4 < charArray.length; i4++) {
            char c = charArray[i4];
            iArr[i4] = CharType.get(c);
            if (c == '.' && i4 < charArray.length - 1 && CharType.get(charArray[i4 + 1]) == 9) {
                iArr[i4] = 9;
            } else {
                if (c == '.' && i4 < charArray.length - 1) {
                    int i5 = i4 + 1;
                    if (charArray[i5] >= '0' && charArray[i5] <= '9') {
                        iArr[i4] = 5;
                    }
                }
                if (iArr[i4] == 8) {
                    iArr[i4] = 5;
                }
            }
        }
        int i6 = 0;
        while (i6 < charArray.length) {
            int i7 = iArr[i6];
            if (i7 == 7 || i7 == 10 || i7 == 6 || i7 == 17) {
                String valueOf = String.valueOf(charArray[i6]);
                if (valueOf.length() != 0) {
                    arrayList.add(new AtomNode(valueOf, i7));
                }
            } else if (i6 >= charArray.length - 1 || !(i7 == 5 || i7 == 9)) {
                arrayList.add(new AtomNode(charArray[i6], i7));
            } else {
                sb.delete(0, sb.length());
                sb.append(charArray[i6]);
                while (true) {
                    if (i6 >= charArray.length - 1) {
                        i3 = i6;
                        z = true;
                        break;
                    }
                    i6++;
                    if (iArr[i6] != i7) {
                        i3 = i6;
                        z = false;
                        break;
                    }
                    sb.append(charArray[i6]);
                }
                arrayList.add(new AtomNode(sb.toString(), i7));
                if (z) {
                    i3++;
                }
                i6 = i3;
            }
            i6++;
        }
        return arrayList;
    }

    static void changeDelimiterPOS(List<Vertex> list) {
        for (Vertex vertex : list) {
            if (vertex.realWord.equals("－－") || vertex.realWord.equals("—") || vertex.realWord.equals(Constants.ACCEPT_TIME_SEPARATOR_SERVER)) {
                vertex.confirmNature(Nature.w);
            }
        }
    }

    private static void checkDateElements(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (listIterator.hasNext()) {
            Vertex next2 = listIterator.next();
            if (TextUtility.isAllNum(next.realWord) || TextUtility.isAllChineseNum(next.realWord)) {
                String str = next2.realWord;
                if ((str.length() == 1 && "月日时分秒".contains(str)) || (str.length() == 2 && str.equals("月份"))) {
                    mergeDate(listIterator, next2, next);
                } else if (str.equals(ImportantEventCustomActivity.YEAR)) {
                    if (TextUtility.isYearTime(next.realWord)) {
                        mergeDate(listIterator, next2, next);
                    } else {
                        next.confirmNature(Nature.m);
                    }
                } else if (next.realWord.endsWith("点")) {
                    next.confirmNature(Nature.t, true);
                } else {
                    char[] charArray = next.realWord.toCharArray();
                    if (!"∶·．／./".contains(String.valueOf(charArray[charArray.length - 1]))) {
                        next.confirmNature(Nature.m, true);
                    } else if (next.realWord.length() > 1) {
                        char charAt = next.realWord.charAt(next.realWord.length() - 1);
                        Vertex newNumberInstance = Vertex.newNumberInstance(next.realWord.substring(0, next.realWord.length() - 1));
                        listIterator.previous();
                        listIterator.previous();
                        listIterator.set(newNumberInstance);
                        listIterator.next();
                        listIterator.add(Vertex.newPunctuationInstance(String.valueOf(charAt)));
                    }
                }
            }
            next = next2;
        }
    }

    protected static List<Term> convert(List<Vertex> list) {
        return convert(list, false);
    }

    protected static void fixResultByRule(List<Vertex> list) {
        mergeContinueNumIntoOne(list);
        changeDelimiterPOS(list);
        splitMiddleSlashFromDigitalWords(list);
        checkDateElements(list);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Graph generateBiGraph(WordNet wordNet) {
        return wordNet.toGraph();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void generateWord(List<Vertex> list, WordNet wordNet) {
        fixResultByRule(list);
        wordNet.addAll(list);
    }

    private static void mergeContinueNumIntoOne(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (listIterator.hasNext()) {
            Vertex next2 = listIterator.next();
            if ((TextUtility.isAllNum(next.realWord) || TextUtility.isAllChineseNum(next.realWord)) && (TextUtility.isAllNum(next2.realWord) || TextUtility.isAllChineseNum(next2.realWord))) {
                next = Vertex.newNumberInstance(next.realWord + next2.realWord);
                listIterator.previous();
                listIterator.previous();
                listIterator.set(next);
                listIterator.next();
                listIterator.next();
                listIterator.remove();
            } else {
                next = next2;
            }
        }
    }

    private static void mergeDate(ListIterator<Vertex> listIterator, Vertex vertex, Vertex vertex2) {
        Vertex newTimeInstance = Vertex.newTimeInstance(vertex2.realWord + vertex.realWord);
        listIterator.previous();
        listIterator.previous();
        listIterator.set(newTimeInstance);
        listIterator.next();
        listIterator.next();
        listIterator.remove();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void speechTagging(List<Vertex> list) {
        Viterbi.compute(list, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
    }

    private static void splitMiddleSlashFromDigitalWords(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (listIterator.hasNext()) {
            Vertex next2 = listIterator.next();
            if (next.getNature() == Nature.nx && (next2.hasNature(Nature.q) || next2.hasNature(Nature.n))) {
                String[] split = next.realWord.split(Constants.ACCEPT_TIME_SEPARATOR_SERVER, 1);
                if (split.length == 2 && TextUtility.isAllNum(split[0]) && TextUtility.isAllNum(split[1])) {
                    Vertex copy = next.copy();
                    copy.realWord = split[0];
                    copy.confirmNature(Nature.m);
                    listIterator.previous();
                    listIterator.previous();
                    listIterator.set(copy);
                    listIterator.next();
                    listIterator.add(Vertex.newPunctuationInstance(Constants.ACCEPT_TIME_SEPARATOR_SERVER));
                    listIterator.add(Vertex.newNumberInstance(split[1]));
                }
            }
            next = next2;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<Term> decorateResultForIndexMode(List<Vertex> list, WordNet wordNet) {
        LinkedList linkedList = new LinkedList();
        ListIterator<Vertex> listIterator = list.listIterator();
        listIterator.next();
        int size = list.size() - 2;
        int i = 1;
        for (int i2 = 0; i2 < size; i2++) {
            Vertex next = listIterator.next();
            Term convert = convert(next);
            linkedList.add(convert);
            convert.offset = i - 1;
            if (next.realWord.length() > 2) {
                for (int i3 = i; i3 < next.realWord.length() + i; i3++) {
                    Iterator<Vertex> descendingIterator = wordNet.descendingIterator(i3);
                    while (descendingIterator.hasNext()) {
                        Vertex next2 = descendingIterator.next();
                        if ((convert.nature == Nature.mq && next2.hasNature(Nature.q)) || next2.realWord.length() >= this.config.indexMode) {
                            if (next2 != next && next2.realWord.length() + i3 <= next.realWord.length() + i) {
                                listIterator.add(next2);
                                Term convert2 = convert(next2);
                                convert2.offset = i3 - 1;
                                linkedList.add(convert2);
                            }
                        }
                    }
                }
            }
            i += next.realWord.length();
        }
        return linkedList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void generateWordNet(final WordNet wordNet) {
        final char[] cArr = wordNet.charArray;
        DoubleArrayTrie<CoreDictionary.Attribute>.Searcher searcher = CoreDictionary.trie.getSearcher(cArr, 0);
        while (searcher.next()) {
            wordNet.add(searcher.begin + 1, new Vertex(new String(cArr, searcher.begin, searcher.length), searcher.value, searcher.index));
        }
        if (this.config.forceCustomDictionary) {
            CustomDictionary.parseText(cArr, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() { // from class: com.hankcs.hanlp.seg.WordBasedSegment.1
                @Override // com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie.IHit
                public void hit(int i, int i2, CoreDictionary.Attribute attribute) {
                    wordNet.add(i + 1, new Vertex(new String(cArr, i, i2 - i), attribute));
                }
            });
        }
        LinkedList<Vertex>[] vertexes = wordNet.getVertexes();
        int i = 1;
        while (i < vertexes.length) {
            if (vertexes[i].isEmpty()) {
                int i2 = i + 1;
                while (i2 < vertexes.length - 1 && vertexes[i2].isEmpty()) {
                    i2++;
                }
                wordNet.add(i, quickAtomSegment(cArr, i - 1, i2 - 1));
                i = i2;
            } else {
                i += vertexes[i].getLast().realWord.length();
            }
        }
    }
}
