package com.huawei.tts.voicesynthesizer.tasks;

import com.huawei.hms.mlkit.tts.b.a;
import com.huawei.texttospeech.frontend.services.normalizers.Normalizer;
import com.huawei.tts.voicesynthesizer.configuration.EnglishTextPreprocessorConfiguration;
import com.huawei.tts.voicesynthesizer.services.PhonemesExtractor;
import com.huawei.tts.voicesynthesizer.tasks.patterns.AbstractPatternApplier;
import com.huawei.tts.voicesynthesizer.tasks.patterns.english.LessSignsRangePatternApplier;
import com.huawei.tts.voicesynthesizer.tasks.patterns.english.LetterAfterDigitAsThousandPatterApplier;
import com.huawei.tts.voicesynthesizer.tasks.patterns.english.MultiplicationSignInAreaPatternApplier;
import com.huawei.tts.voicesynthesizer.tasks.patterns.english.SquareBracketsRangePatternApplier;
import com.huawei.tts.voicesynthesizer.utils.WordHashCalculator;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes2.dex */
public class EnglishTextPreprocessor extends TextPreprocessorBase {
    public final Map<String, List<String>> currencyDictionary;
    public final Pattern separateLetterPattern;
    public final Pattern upperCasePattern;
    public WordHashCalculator wordHashCalculator;
    public final int[] wordsHashes;
    public static final Pattern MEASURES_PATTERN = Pattern.compile("[MGT][ABFHJKLNTVW]");
    public static final Pattern DIGIT_PATTERN = Pattern.compile("\\d");
    public static final Pattern ROMAN_NUMBERALS_PATTERN = Pattern.compile("^(?=[MDCLXVI])M*(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3})$");

    public EnglishTextPreprocessor(Normalizer normalizer, PhonemesExtractor phonemesExtractor, EnglishTextPreprocessorConfiguration englishTextPreprocessorConfiguration, WordHashCalculator wordHashCalculator) {
        super(normalizer, phonemesExtractor, englishTextPreprocessorConfiguration);
        this.wordHashCalculator = wordHashCalculator;
        this.wordsHashes = englishTextPreprocessorConfiguration.getWordsHashes();
        this.currencyDictionary = englishTextPreprocessorConfiguration.getCurrencyDictionary();
        String lowerCase = this.configuration.getLetters().toLowerCase(this.configuration.getLocale());
        StringBuilder a2 = a.a("(?<=(\\A| ))[");
        a2.append(lowerCase.replaceAll("[ai]", ""));
        a2.append("]( |\\z)");
        this.separateLetterPattern = Pattern.compile(a2.toString());
        StringBuilder a3 = a.a("\\b[");
        a3.append(this.configuration.getLetters());
        a3.append("]{2,}\\b");
        this.upperCasePattern = Pattern.compile(a3.toString());
    }

    @Override // com.huawei.tts.voicesynthesizer.tasks.TextPreprocessorBase, com.huawei.tts.voicesynthesizer.tasks.TextPreprocessor
    public String postnormalize(String str) {
        Matcher matcher = this.separateLetterPattern.matcher(str);
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            matcher.appendReplacement(stringBuffer, pronounceSeparately(str.substring(matcher.start(), matcher.end()), false));
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }

    @Override // com.huawei.tts.voicesynthesizer.tasks.TextPreprocessorBase, com.huawei.tts.voicesynthesizer.tasks.TextPreprocessor
    public String prenormalize(String str) {
        String prenormalize = super.prenormalize(str);
        StringBuffer stringBuffer = new StringBuffer();
        Iterator it = Arrays.asList(new SquareBracketsRangePatternApplier(), new LessSignsRangePatternApplier(), new MultiplicationSignInAreaPatternApplier(), new LetterAfterDigitAsThousandPatterApplier()).iterator();
        while (it.hasNext()) {
            prenormalize = ((AbstractPatternApplier) it.next()).replace(prenormalize);
        }
        Matcher matcher = this.upperCasePattern.matcher(prenormalize);
        while (matcher.find()) {
            String substring = prenormalize.substring(matcher.start(), matcher.end());
            if (!ROMAN_NUMBERALS_PATTERN.matcher(substring).matches()) {
                String lowerCase = substring.toLowerCase(this.configuration.getLocale());
                byte[] bytes = lowerCase.getBytes(StandardCharsets.UTF_8);
                if (Arrays.binarySearch(this.wordsHashes, this.wordHashCalculator.calculateWordHash(bytes, 0, bytes.length)) >= 0) {
                    if (MEASURES_PATTERN.matcher(substring).matches()) {
                        if (matcher.start() - 2 < 0) {
                            matcher.appendReplacement(stringBuffer, pronounceSeparately(lowerCase, true));
                        } else if (!DIGIT_PATTERN.matcher(prenormalize.substring(matcher.start() - 2, matcher.start() - 1)).matches()) {
                            matcher.appendReplacement(stringBuffer, pronounceSeparately(lowerCase, true));
                        }
                    }
                } else if (!this.currencyDictionary.containsKey(substring)) {
                    matcher.appendReplacement(stringBuffer, pronounceSeparately(lowerCase, true));
                }
            }
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }
}
