package com.twitter.penguin.korean.tokenizer;

import com.twitter.Regex;
import com.twitter.penguin.korean.tokenizer.KoreanChunker;
import com.twitter.penguin.korean.tokenizer.KoreanTokenizer;
import com.twitter.penguin.korean.util.KoreanPos$;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import scala.Enumeration;
import scala.MatchError;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.immutable.StringOps$;
import scala.collection.mutable.ListBuffer;
import scala.math.Ordering$Int$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.RichChar$;

/* compiled from: KoreanChunker.scala */
/* loaded from: classes46.dex */
public final class KoreanChunker$ {
    public static final KoreanChunker$ MODULE$ = null;
    private final Seq<Enumeration.Value> CHUNKING_ORDER;
    private final Map<Enumeration.Value, Pattern> com$twitter$penguin$korean$tokenizer$KoreanChunker$$POS_PATTERNS;

    static {
        new KoreanChunker$();
    }

    private KoreanChunker$() {
        MODULE$ = this;
        this.com$twitter$penguin$korean$tokenizer$KoreanChunker$$POS_PATTERNS = (Map) Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Korean()), new StringOps(Predef$.MODULE$.augmentString("([가-힣]+)")).r().pattern()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Alpha()), new StringOps(Predef$.MODULE$.augmentString("(\\p{Alpha}+)")).r().pattern()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Number()), new StringOps(Predef$.MODULE$.augmentString("(\\$?\\p{Digit}+(,\\p{Digit}{3})*([/~:\\.-]\\p{Digit}+)?(천|만|억|조)*(%|원|달러|위안|옌|엔|유로|등|년|월|일|회|시간|시|분|초)?)")).r().pattern()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.KoreanParticle()), new StringOps(Predef$.MODULE$.augmentString("([ㄱ-ㅣ]+)")).r().pattern()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Punctuation()), new StringOps(Predef$.MODULE$.augmentString("([\\p{Punct}·…’]+)")).r().pattern()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.URL()), Regex.VALID_URL), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Email()), new StringOps(Predef$.MODULE$.augmentString("([\\p{Alnum}\\.\\-_]+@[\\p{Alnum}\\.]+)")).r().pattern()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Hashtag()), Regex.VALID_HASHTAG), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.ScreenName()), Regex.VALID_MENTION_OR_LIST), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.CashTag()), Regex.VALID_CASHTAG), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(KoreanPos$.MODULE$.Space()), new StringOps(Predef$.MODULE$.augmentString("\\s+")).r().pattern())}));
        this.CHUNKING_ORDER = (Seq) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Enumeration.Value[]{KoreanPos$.MODULE$.URL(), KoreanPos$.MODULE$.Email(), KoreanPos$.MODULE$.ScreenName(), KoreanPos$.MODULE$.Hashtag(), KoreanPos$.MODULE$.CashTag(), KoreanPos$.MODULE$.Number(), KoreanPos$.MODULE$.Korean(), KoreanPos$.MODULE$.KoreanParticle(), KoreanPos$.MODULE$.Alpha(), KoreanPos$.MODULE$.Punctuation()}));
    }

    private Seq<Enumeration.Value> CHUNKING_ORDER() {
        return this.CHUNKING_ORDER;
    }

    private List<KoreanChunker.ChunkMatch> fillInUnmatched(String str, Seq<KoreanChunker.ChunkMatch> seq, Enumeration.Value value) {
        Tuple2 tuple2 = (Tuple2) seq.foldLeft(new Tuple2(Nil$.MODULE$, BoxesRunTime.boxToInteger(0)), new KoreanChunker$$anonfun$2(str, value));
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        Tuple2 tuple22 = new Tuple2((List) tuple2.mo40_1(), BoxesRunTime.boxToInteger(tuple2._2$mcI$sp()));
        List list = (List) tuple22.mo40_1();
        int _2$mcI$sp = tuple22._2$mcI$sp();
        return (_2$mcI$sp < str.length() ? list.$colon$colon(new KoreanChunker.ChunkMatch(_2$mcI$sp, str.length(), StringOps$.MODULE$.slice$extension(Predef$.MODULE$.augmentString(str), _2$mcI$sp, str.length()), value)) : list).reverse();
    }

    private List<KoreanChunker.ChunkMatch> findAllPatterns(Matcher matcher, Enumeration.Value value, List<KoreanChunker.ChunkMatch> list) {
        while (matcher.find()) {
            list = list.$colon$colon(new KoreanChunker.ChunkMatch(matcher.start(), matcher.end(), matcher.group(), value));
        }
        return list;
    }

    private List<KoreanChunker.ChunkMatch> findAllPatterns$default$3() {
        return Nil$.MODULE$;
    }

    public Seq<KoreanTokenizer.KoreanToken> chunk(CharSequence charSequence) {
        String obj = charSequence.toString();
        Tuple2 tuple2 = (Tuple2) ((TraversableOnce) splitBySpaceKeepingSpace(obj).flatMap(new KoreanChunker$$anonfun$3(), Seq$.MODULE$.canBuildFrom())).foldLeft(new Tuple2(Nil$.MODULE$, BoxesRunTime.boxToInteger(0)), new KoreanChunker$$anonfun$4(obj));
        if (tuple2 != null) {
            List list = (List) tuple2.mo40_1();
            int _2$mcI$sp = tuple2._2$mcI$sp();
            if (list != null) {
                Tuple2 tuple22 = new Tuple2(list, BoxesRunTime.boxToInteger(_2$mcI$sp));
                List list2 = (List) tuple22.mo40_1();
                tuple22._2$mcI$sp();
                return list2.reverse();
            }
        }
        throw new MatchError(tuple2);
    }

    public Map<Enumeration.Value, Pattern> com$twitter$penguin$korean$tokenizer$KoreanChunker$$POS_PATTERNS() {
        return this.com$twitter$penguin$korean$tokenizer$KoreanChunker$$POS_PATTERNS;
    }

    public List<KoreanChunker.ChunkMatch> com$twitter$penguin$korean$tokenizer$KoreanChunker$$splitChunks(String str) {
        if (RichChar$.MODULE$.isSpaceChar$extension(Predef$.MODULE$.charWrapper(str.charAt(0)))) {
            return List$.MODULE$.apply((Seq) Predef$.MODULE$.wrapRefArray(new KoreanChunker.ChunkMatch[]{new KoreanChunker.ChunkMatch(0, str.length(), str, KoreanPos$.MODULE$.Space())}));
        }
        ListBuffer listBuffer = new ListBuffer();
        CHUNKING_ORDER().foreach(new KoreanChunker$$anonfun$com$twitter$penguin$korean$tokenizer$KoreanChunker$$splitChunks$1(str, listBuffer, IntRef.create(0)));
        return fillInUnmatched(str, ((ListBuffer) listBuffer.sortBy(new KoreanChunker$$anonfun$1(), Ordering$Int$.MODULE$)).toList(), KoreanPos$.MODULE$.Foreign());
    }

    public Seq<String> getChunks(String str, boolean z) {
        return (Seq) chunk(str).map(new KoreanChunker$$anonfun$getChunks$1(), Seq$.MODULE$.canBuildFrom());
    }

    public boolean getChunks$default$2() {
        return false;
    }

    public Seq<KoreanTokenizer.KoreanToken> getChunksByPos(String str, Enumeration.Value value) {
        return (Seq) chunk(str).filter(new KoreanChunker$$anonfun$getChunksByPos$1(value));
    }

    public Seq<String> splitBySpaceKeepingSpace(CharSequence charSequence) {
        Matcher matcher = new StringOps(Predef$.MODULE$.augmentString("\\s+")).r().pattern().matcher(charSequence);
        ListBuffer listBuffer = new ListBuffer();
        int i = 0;
        while (matcher.find()) {
            if (i < matcher.start()) {
                listBuffer.$plus$eq((ListBuffer) charSequence.subSequence(i, matcher.start()).toString());
            } else {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            }
            listBuffer.$plus$eq((ListBuffer) charSequence.subSequence(matcher.start(), matcher.end()).toString());
            i = matcher.end();
        }
        if (i < charSequence.length()) {
            listBuffer.$plus$eq((ListBuffer) charSequence.subSequence(i, charSequence.length()).toString());
        } else {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        return listBuffer.toList();
    }
}
