package sec.bdc.tm.hte.ko.kpepipeline;

import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import sec.bdc.nlp.ds.Sentence;
import sec.bdc.nlp.ds.Token;
import sec.bdc.nlp.ds.Word;
import sec.bdc.nlp.tokenizer.intf.Tokenizer;
import sec.bdc.tm.hte.common.intf.subpipeline.Preprocessor;
import sec.bdc.tm.hte.ko.kpepipeline.preprocess.LexerUnitSplitter;
import sec.bdc.tm.hte.ko.kpepipeline.preprocess.Unit;
import sec.bdc.tm.hte.ko.kpepipeline.preprocess.UnitType;
import sec.bdc.tm.hte.ko.kpepipeline.ss.LangthBasedLexerSentenceSplitter;

/* loaded from: classes49.dex */
public class PreprocessorKo implements Preprocessor {
    private static /* synthetic */ int[] $SWITCH_TABLE$sec$bdc$tm$hte$ko$kpepipeline$preprocess$UnitType;
    private Tokenizer tokenizer;

    static /* synthetic */ int[] $SWITCH_TABLE$sec$bdc$tm$hte$ko$kpepipeline$preprocess$UnitType() {
        int[] iArr = $SWITCH_TABLE$sec$bdc$tm$hte$ko$kpepipeline$preprocess$UnitType;
        if (iArr == null) {
            iArr = new int[UnitType.valuesCustom().length];
            try {
                iArr[UnitType.EMAIL.ordinal()] = 11;
            } catch (NoSuchFieldError e) {
            }
            try {
                iArr[UnitType.EMOTICON.ordinal()] = 12;
            } catch (NoSuchFieldError e2) {
            }
            try {
                iArr[UnitType.ENGLISH.ordinal()] = 5;
            } catch (NoSuchFieldError e3) {
            }
            try {
                iArr[UnitType.ETC.ordinal()] = 15;
            } catch (NoSuchFieldError e4) {
            }
            try {
                iArr[UnitType.HANGUL.ordinal()] = 2;
            } catch (NoSuchFieldError e5) {
            }
            try {
                iArr[UnitType.HANMUN.ordinal()] = 4;
            } catch (NoSuchFieldError e6) {
            }
            try {
                iArr[UnitType.IP.ordinal()] = 9;
            } catch (NoSuchFieldError e7) {
            }
            try {
                iArr[UnitType.JAMO.ordinal()] = 3;
            } catch (NoSuchFieldError e8) {
            }
            try {
                iArr[UnitType.LETTER.ordinal()] = 6;
            } catch (NoSuchFieldError e9) {
            }
            try {
                iArr[UnitType.NUMBER.ordinal()] = 7;
            } catch (NoSuchFieldError e10) {
            }
            try {
                iArr[UnitType.SPACE.ordinal()] = 1;
            } catch (NoSuchFieldError e11) {
            }
            try {
                iArr[UnitType.SYMBOL.ordinal()] = 8;
            } catch (NoSuchFieldError e12) {
            }
            try {
                iArr[UnitType.THING.ordinal()] = 13;
            } catch (NoSuchFieldError e13) {
            }
            try {
                iArr[UnitType.TIME.ordinal()] = 14;
            } catch (NoSuchFieldError e14) {
            }
            try {
                iArr[UnitType.URL.ordinal()] = 10;
            } catch (NoSuchFieldError e15) {
            }
            $SWITCH_TABLE$sec$bdc$tm$hte$ko$kpepipeline$preprocess$UnitType = iArr;
        }
        return iArr;
    }

    public PreprocessorKo(Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
    }

    private String getPosTag(Unit unit) {
        switch ($SWITCH_TABLE$sec$bdc$tm$hte$ko$kpepipeline$preprocess$UnitType()[unit.getUnitType().ordinal()]) {
            case 3:
                return "NN";
            case 4:
                return "SH";
            case 5:
            case 6:
                return "SL";
            case 7:
                return "SN";
            case 8:
                return getSymbolPosTag(unit.getWord()[0]);
            case 9:
            case 10:
            case 11:
                return "URL";
            case 12:
                return "SW";
            case 13:
            case 14:
            default:
                return "NN";
            case 15:
                return "NA";
        }
    }

    private static String getSymbolPosTag(char c) {
        switch (c) {
            case '!':
            case '.':
            case '?':
                return "SF";
            case '\"':
            case '\'':
            case '(':
            case ')':
            case '[':
            case ']':
            case '{':
            case '}':
                return "SS";
            case ',':
            case ':':
            case ';':
                return "SP";
            default:
                return "SW";
        }
    }

    private String reduceNewlines(String str, int i) {
        if (!str.contains("\n")) {
            return str;
        }
        String replaceAll = str.replaceAll("[\\r\\n]+", "\n");
        StringBuilder sb = new StringBuilder();
        int i2 = 0;
        int i3 = 0;
        int indexOf = replaceAll.indexOf("\n");
        while (indexOf != -1) {
            i2++;
            sb.append(replaceAll.substring(i3, indexOf));
            if (i2 % i == 0) {
                sb.append("\n");
            } else {
                sb.append(StringUtils.SPACE);
            }
            i3 = indexOf + 1;
            indexOf = replaceAll.indexOf("\n", i3);
        }
        sb.append(replaceAll.substring(i3, replaceAll.length()));
        return sb.toString();
    }

    @Override // sec.bdc.tm.hte.common.intf.subpipeline.Preprocessor
    public List<Sentence> doProcess(String str) {
        List<Sentence> split = new LangthBasedLexerSentenceSplitter().split(str);
        LexerUnitSplitter lexerUnitSplitter = new LexerUnitSplitter();
        for (Sentence sentence : split) {
            List<Unit> split2 = lexerUnitSplitter.split(sentence.getRawText().toCharArray());
            ArrayList arrayList = new ArrayList();
            for (Unit unit : split2) {
                if (unit.getUnitType() == UnitType.HANGUL) {
                    List<Token> list = this.tokenizer.tokenizeSentenceWithoutSpace(unit.getString());
                    for (Token token : list) {
                        if (token.getPosTag().equals("UN")) {
                            token.setPosTag("NN");
                        }
                        token.setPosition(token.getPosition() + unit.start);
                    }
                    arrayList.add(new Word(unit.start, unit.getString(), list));
                } else {
                    arrayList.add(new Word(new Token(unit.start, unit.getString(), getPosTag(unit))));
                }
            }
            sentence.setWordList(arrayList);
        }
        return split;
    }

    @Override // sec.bdc.tm.hte.common.intf.subpipeline.Preprocessor
    public Sentence doProcessSentence(String str) {
        List<Word> list = this.tokenizer.tokenizeSentenceWithoutSpaceToWordList(str);
        Sentence sentence = new Sentence(str);
        sentence.setWordList(list);
        return sentence;
    }
}
