package org.apache.lucene.analysis.th;

import java.text.BreakIterator;
import java.util.Locale;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArrayIterator;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: classes.dex */
public class ThaiTokenizer extends SegmentingTokenizerBase {
    public static final boolean DBBI_AVAILABLE;
    private static final BreakIterator proto = BreakIterator.getWordInstance(new Locale("th"));
    private static final BreakIterator sentenceProto;
    private final OffsetAttribute offsetAtt;
    int sentenceEnd;
    int sentenceStart;
    private final CharTermAttribute termAtt;
    private final BreakIterator wordBreaker;
    private final CharArrayIterator wrapper;

    static {
        proto.setText("ภาษาไทย");
        DBBI_AVAILABLE = proto.isBoundary(4);
        sentenceProto = BreakIterator.getSentenceInstance(Locale.ROOT);
    }

    public ThaiTokenizer() {
        this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
    }

    public ThaiTokenizer(AttributeFactory attributeFactory) {
        super(attributeFactory, (BreakIterator) sentenceProto.clone());
        this.wrapper = CharArrayIterator.newWordInstance();
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        if (!DBBI_AVAILABLE) {
            throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
        }
        this.wordBreaker = (BreakIterator) proto.clone();
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected boolean incrementWord() {
        int current = this.wordBreaker.current();
        if (current == -1) {
            return false;
        }
        int next = this.wordBreaker.next();
        while (next != -1 && !Character.isLetterOrDigit(Character.codePointAt(this.buffer, this.sentenceStart + current, this.sentenceEnd))) {
            current = next;
            next = this.wordBreaker.next();
        }
        if (next == -1) {
            return false;
        }
        clearAttributes();
        this.termAtt.copyBuffer(this.buffer, this.sentenceStart + current, next - current);
        this.offsetAtt.setOffset(correctOffset(current + this.offset + this.sentenceStart), correctOffset(next + this.offset + this.sentenceStart));
        return true;
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected void setNextSentence(int i2, int i3) {
        this.sentenceStart = i2;
        this.sentenceEnd = i3;
        this.wrapper.setText(this.buffer, i2, i3 - i2);
        this.wordBreaker.setText(this.wrapper);
    }
}
