package com.huawei.hms.texttospeech.frontend.services.normalizers;

import com.huawei.hms.mlkit.tts.e.a;
import com.huawei.hms.texttospeech.frontend.services.TokenizedText;
import com.huawei.hms.texttospeech.frontend.services.replacers.date.AbstractDateReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.link.CommonLinkReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.mathsymbols.MathSymbolReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.money.CommonMoneyReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.number.CommonNumberReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.shortening.CapitalLettersReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.shortening.ShorteningReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.specialsymbols.CommonSpecialSymbolReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.time.AbstractTimeReplacer;
import com.huawei.hms.texttospeech.frontend.services.replacers.units.AbstractUnitReplacer;
import com.huawei.hms.texttospeech.frontend.services.tools.StringReplacer;
import com.huawei.hms.texttospeech.frontend.services.utils.StringUtils;
import com.huawei.texttospeech.frontend.services.normalizers.GermanTextNormalizer;
import com.huawei.texttospeech.frontend.services.replacers.number.italian.pattern.PhoneNumberPattern;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes2.dex */
public abstract class AbstractTextNormalizer implements Normalizer {
    public static final String EM_DASH = "—";
    public static final String EN_DASH = "–";
    public static final String FIGURE_DASH = "‒";
    public static final String FIGURE_SPACE = " ";
    public static final String HORIZONTAL_BAR = "―";
    public static final String HYPHEN_MINUS = "-";
    public static final String MINUS_SIGN = "−";
    public static final String NARROW_NO_BREAK_SPACE = " ";
    public static final String NON_BREAKING_HYPHEN = "‑";
    public static final String NO_BREAK_SPACE = " ";
    public static final String STANDARD_APOSTROPHE = "'";
    public static final String STANDARD_QUOTE = "\"";
    public static final String THREE_EM_DASH = "⸻";
    public static final String TWO_EM_DASH = "⸺";
    public static final String WORD_JOINER = "\u2060";
    public ShorteningReplacer acronymReplacer;
    public String allowedCharsRegex;
    public CapitalLettersReplacer capitalLettersReplacer;
    public AbstractDateReplacer dateReplacer;
    public CommonLinkReplacer linkReplacer;
    public MathSymbolReplacer mathSymbolReplacer;
    public CommonMoneyReplacer moneyReplacer;
    public CommonNumberReplacer numberReplacer;
    public Pattern patternContainDigit;
    public CommonSpecialSymbolReplacer specialSymbolReplacer;
    public AbstractTimeReplacer timeReplacer;
    public AbstractUnitReplacer unitReplacer;
    public static final List<String> SINGLE_QUOTES_AND_APOSTROPHES = Arrays.asList("‚", "‘", "’", "‛", "❛", "❜", "'", "＇");
    public static final List<String> DOUBLE_QUOTES = Arrays.asList("„", "“", "”", "‟", "»", "«", "»", "❝", "❞", "‹", "›", "＂", "〝", "〞", "〟");
    public Pattern numberReg = Pattern.compile("(二)(到)(三|四|五|六|七|八|九)(分钟|趟|粒|个|小时|米|寸|纳米|微米|厘米|分米|千米|公里|吨|公斤|千克|斤|克|磅|盎司|摩尔|毫升|升|赫兹|戈瑞|分贝|比特|字节|千伏|平方米|立方米|平方|立方|摄氏度|岁|根|亿|页|块|颗|度|天|种|台|亩|户|次|瓶|份|倍|双|遍|只|箱|辆|杯|棵|卷|堆|周年|海里|毫安|厘米|朵|篇|公顷|减|加|乘|除|等于|卡路里|千卡路里|千卡卡路里|千卡|港元|泰铢|印度卢比|俄罗斯卢布|越南盾|澳门元|角|勺|英寸|公克|毫米|届|万|英里|公分|英尺|毫克|柬埔寨瑞尔|澳元|元)");
    public Pattern singNumberReg = Pattern.compile("(?<=[^零一二三四五六七八九十点比])(二)(米|毫升|千克)");
    public Pattern startWithNumberReg = Pattern.compile("(?<![一二三四五六七八九十比点]\\D{0,5})(二)(?=(百|元|米|毫升|千克|分钟|趟|粒|个|小时|寸|纳米|微米|分米|点整|千米|公里|吨|公斤|斤|克|磅|盎司|摩尔|升|赫兹|戈瑞|分贝|比特|字节|千伏|平方米|立方米|平方|立方|岁|根|亿|页|块|颗|度|天|种|台|亩|户|次|瓶|份|倍|双|遍|只|箱|辆|杯|棵|卷|堆|周年|海里|毫安|厘米|朵|篇|公顷))");
    public Pattern unitPattern = Pattern.compile("(/|／)(?=\\D{0,2}[瓶羽期连岁次克米度串册下丈丝举具美包毫厘刀分列则剂副些匝队陌陔部出个介令份伙件任倍儋亩记双发叠节茎莛荮落蓬蔸巡过进通造遍道遭对尊头套弓引张弯开庄床座庹帖帧席常幅幢口句号台只吊合名吨味响骑门间阕宗客家彪层尾届声扎打扣把抛批抔抱拨担拉抬拃挂挑挺捆掬排捧掐搭提握摊摞撇撮汪泓泡注浔派湾溜滩滴级纸线组绞统绺综缕缗场块坛垛堵堆堂塔墩回团围圈孔贴点煎熟车轮转载辆料卷截户房所扇炉炷觉斤笔本朵杆束条杯枚枝柄栋架根桄梃样株桩梭桶棵榀槽犋爿片歇手拳段沓班文曲替股肩脬腔支步武瓣秒秩钟钱铢锊铺锤锭锱章盆盏盘眉眼石码砣碗磴票罗畈番窝联缶耦粒索累緉般艘竿筥筒筹管篇箱簇角重身躯酲起趟面首项领顶颗顷袭群袋页桢天年月日时元百千万亿余字种兆]|人民币|欧元|平方|立方|公顷|升|毫升|磅|克拉|吨|加仑|[千分厘毫微纳]?米|[公]?里|英[里尺寸]|公斤|千克|盎司|分钟|秒钟|[毫微]秒|牛[顿米]?|焦耳|帕斯卡|兆|伏|安培|安时|欧姆|法拉|[毫微皮]?法|[毫微]?亨|[千兆G]?赫兹|[毫千兆]?瓦|摩尔|摄氏度|华氏度|小时|省(市)?)");
    public Pattern unitPatternRange = Pattern.compile("(?<!\\d)[+-]?\\d+(\\.\\d+)?([~～-])[+-]?\\d+(\\.\\d+)?\\s?(?=[瓶度串事册丘乘下丈丝两举具美包厘刀分列则剂副些匝队陌陔部出个介令份伙件任倍儋卖亩记双发叠节茎莛荮落蓬蔸巡过进通造遍道遭对尊头套弓引张弯开庄床座庹帖帧席常幅幢口句号台只吊合名吨度味响骑门间阕宗客家彪层尾届声扎打扣把抛批抔抱拨担拉抬拃挂挑挺捆掬排捧掐搭提握摊摞撇撮汪泓泡注浔派湾溜滩滴级纸线组绞统绺综缕缗场块坛垛堵堆堂塔墩回团围圈孔贴点煎熟车轮载辆料卷截户房所扇炉炷觉斤笔本朵杆束条杯枚枝柄栋架根桄梃样株桩梭桶棵榀槽犋爿片歇手拳段沓班文曲替股肩脬腔支步武瓣秒秩钟钱铢锊铺锤锭锱章盆盏盘眉眼石码砣碗磴票罗畈番窝联缶耦粒索累緉般艘竿筥筒筹管篇箱簇角重身躯酲起趟面首项领顶颗顷袭群袋页桢天月日时分秒人至元十百千万亿多次集位岁余里米克羽处胜负平强军师旅营行秒分字种]|千焦耳|世纪|小时|公斤|千克|盎司|[一-龥]?里|[一-龥]?米)");
    public LinkedHashMap<Pattern, String> patternsPreprocessing = new LinkedHashMap<>();
    public LinkedHashMap<Pattern, String> patternsPostprocessing = new LinkedHashMap<>();

    public AbstractTextNormalizer(AbstractDateReplacer abstractDateReplacer, AbstractUnitReplacer abstractUnitReplacer, CommonMoneyReplacer commonMoneyReplacer, CommonNumberReplacer commonNumberReplacer, ShorteningReplacer shorteningReplacer, CapitalLettersReplacer capitalLettersReplacer, AbstractTimeReplacer abstractTimeReplacer, CommonLinkReplacer commonLinkReplacer, MathSymbolReplacer mathSymbolReplacer, CommonSpecialSymbolReplacer commonSpecialSymbolReplacer) {
        Objects.requireNonNull(abstractDateReplacer);
        Objects.requireNonNull(abstractUnitReplacer);
        Objects.requireNonNull(commonMoneyReplacer);
        Objects.requireNonNull(commonNumberReplacer);
        Objects.requireNonNull(shorteningReplacer);
        Objects.requireNonNull(capitalLettersReplacer);
        Objects.requireNonNull(abstractTimeReplacer);
        Objects.requireNonNull(commonLinkReplacer);
        Objects.requireNonNull(mathSymbolReplacer);
        Objects.requireNonNull(commonSpecialSymbolReplacer);
        this.dateReplacer = abstractDateReplacer;
        this.unitReplacer = abstractUnitReplacer;
        this.moneyReplacer = commonMoneyReplacer;
        this.numberReplacer = commonNumberReplacer;
        this.acronymReplacer = shorteningReplacer;
        this.capitalLettersReplacer = capitalLettersReplacer;
        this.timeReplacer = abstractTimeReplacer;
        this.linkReplacer = commonLinkReplacer;
        this.mathSymbolReplacer = mathSymbolReplacer;
        this.specialSymbolReplacer = commonSpecialSymbolReplacer;
        this.allowedCharsRegex = initializeAllowedCharsRegex();
        setPreprocessingPatterns();
        setPostprocessingPatterns();
        this.patternContainDigit = Pattern.compile("\\D");
    }

    @Override // com.huawei.hms.texttospeech.frontend.services.normalizers.Normalizer
    public String allowedCharsRegex() {
        return this.allowedCharsRegex;
    }

    public abstract String initializeAllowedCharsRegex();

    @Override // com.huawei.hms.texttospeech.frontend.services.normalizers.Normalizer
    public final String normalize(String str) {
        Objects.requireNonNull(str);
        return run(str).text;
    }

    public TokenizedText postprocess(TokenizedText tokenizedText) {
        tokenizedText.text = StringReplacer.replaceMap(tokenizedText.text, this.patternsPostprocessing);
        return tokenizedText;
    }

    public TokenizedText preprocess(TokenizedText tokenizedText) {
        StringBuilder a2 = a.a(" ");
        a2.append(tokenizedText.text);
        a2.append(" ");
        String sb = a2.toString();
        tokenizedText.text = sb;
        tokenizedText.text = StringReplacer.replaceMap(sb, this.patternsPreprocessing);
        return tokenizedText;
    }

    public TokenizedText run(String str) {
        TokenizedText replace = this.linkReplacer.replace(this.acronymReplacer.replace(preprocess(new TokenizedText(str))));
        Matcher matcher = this.patternContainDigit.matcher(replace.text);
        if (matcher.find()) {
            replace = this.dateReplacer.replace(this.timeReplacer.replace(this.moneyReplacer.replace(replace)));
        }
        TokenizedText replace2 = this.mathSymbolReplacer.replace(this.unitReplacer.replace(replace));
        if (matcher.find()) {
            replace2 = this.numberReplacer.replace(replace2);
        }
        return postprocess(this.specialSymbolReplacer.replace(this.capitalLettersReplacer.replace(replace2)));
    }

    public void setPostprocessingPatterns() {
        this.patternsPostprocessing.put(Pattern.compile("(\\.{2,})"), PhoneNumberPattern.FULL_STOP);
        this.patternsPostprocessing.put(Pattern.compile("(\\.\\s?,)"), PhoneNumberPattern.FULL_STOP);
        this.patternsPostprocessing.put(Pattern.compile("(,\\s?\\.)"), PhoneNumberPattern.FULL_STOP);
        this.patternsPostprocessing.put(Pattern.compile("(\\?\\s?,)"), "? ");
        this.patternsPostprocessing.put(Pattern.compile("(,\\s?\\?)"), "? ");
        this.patternsPostprocessing.put(Pattern.compile("(!\\s?,)"), "! ");
        this.patternsPostprocessing.put(Pattern.compile("(,\\s?!)"), "! ");
        this.patternsPostprocessing.put(Pattern.compile("(?<=\\w)\\.(?=[A-Z])"), PhoneNumberPattern.FULL_STOP);
        this.patternsPostprocessing.put(Pattern.compile("(?<=\\w),(?=[A-Z])"), ", ");
        this.patternsPostprocessing.put(Pattern.compile("-"), GermanTextNormalizer.JOINER);
        LinkedHashMap<Pattern, String> linkedHashMap = this.patternsPostprocessing;
        StringBuilder a2 = a.a("[^");
        a2.append(allowedCharsRegex());
        a2.append("]");
        linkedHashMap.put(Pattern.compile(a2.toString()), " ");
        this.patternsPostprocessing.put(Pattern.compile("\\s{2,}"), " ");
        this.patternsPostprocessing.put(Pattern.compile("(^\\s|\\s$)"), "");
    }

    public void setPreprocessingPatterns() {
        LinkedHashMap<Pattern, String> linkedHashMap = this.patternsPreprocessing;
        Locale locale = Locale.ENGLISH;
        linkedHashMap.put(Pattern.compile(String.format(locale, "[%s%s%s%s]", " ", " ", " ", "\u2060")), " ");
        this.patternsPreprocessing.put(Pattern.compile(String.format(locale, "[%s%s%s%s%s]", "—", "‒", "―", "⸺", "⸻")), "–");
        this.patternsPreprocessing.put(Pattern.compile(String.format(locale, "[%s%s]", "‑", "−")), "-");
        this.patternsPreprocessing.put(Pattern.compile("\\s{2,}"), " ");
        LinkedHashMap<Pattern, String> linkedHashMap2 = this.patternsPreprocessing;
        StringBuilder a2 = a.a("(");
        a2.append(StringUtils.join("|", SINGLE_QUOTES_AND_APOSTROPHES));
        a2.append(")");
        linkedHashMap2.put(Pattern.compile(a2.toString()), "'");
        LinkedHashMap<Pattern, String> linkedHashMap3 = this.patternsPreprocessing;
        StringBuilder a3 = a.a("(");
        a3.append(StringUtils.join("|", DOUBLE_QUOTES));
        a3.append(")");
        linkedHashMap3.put(Pattern.compile(a3.toString()), "\"");
        this.patternsPreprocessing.put(Pattern.compile("(—|-|–)"), "-");
    }
}
