package com.hankcs.hanlp.mining.word2vec;

import com.hankcs.hanlp.utility.Predefine;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Comparator;

/* loaded from: classes.dex */
class Word2VecTraining {
    static final int EXP_TABLE_SIZE = 1000;
    static final int MAX_EXP = 6;
    static final int MAX_SENTENCE_LENGTH = 1000;
    static final int TABLE_SIZE = 100000000;
    static double[] syn0;
    static double[] syn1;
    static double[] syn1neg;
    private final Config config;
    int[] table;
    int threadCount;
    long timeStart;
    static final Charset ENCODING = Charset.forName("UTF-8");
    static final double[] expTable = new double[1001];

    /* loaded from: classes.dex */
    static class TrainModelThread extends Thread {
        static int wordCountActual;
        float alpha;
        final Config config;
        final Corpus corpus;
        final int id;
        final float startingAlpha;
        final int[] table;
        final long timeStart;
        final float trainWords;
        final Word2VecTraining vec;
        final VocabWord[] vocab;
        final int vocabSize;

        public TrainModelThread(Word2VecTraining word2VecTraining, Corpus corpus, Config config, int i) {
            this.vec = word2VecTraining;
            this.corpus = corpus;
            this.config = config;
            float alpha = config.getAlpha();
            this.alpha = alpha;
            this.startingAlpha = alpha;
            this.id = i;
            this.table = word2VecTraining.table;
            this.trainWords = corpus.getTrainWords();
            this.timeStart = word2VecTraining.timeStart;
            this.vocabSize = corpus.getVocabSize();
            this.vocab = corpus.getVocab();
        }

        /* JADX WARN: Removed duplicated region for block: B:130:0x035c A[Catch: IOException -> 0x0613, LOOP:13: B:129:0x035a->B:130:0x035c, LOOP_END, TryCatch #1 {IOException -> 0x0613, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0087, B:10:0x00d6, B:11:0x00e2, B:12:0x00fb, B:14:0x011a, B:19:0x0132, B:38:0x019f, B:278:0x05c4, B:283:0x05d9, B:280:0x05f3, B:40:0x01b3, B:50:0x01cd, B:54:0x01d5, B:56:0x01da, B:59:0x01f1, B:62:0x01ff, B:67:0x0208, B:72:0x022f, B:75:0x0213, B:77:0x0224, B:85:0x0243, B:90:0x0256, B:92:0x025e, B:94:0x026d, B:100:0x02ce, B:104:0x0288, B:106:0x02a9, B:110:0x02bd, B:116:0x02da, B:120:0x0318, B:122:0x031f, B:126:0x0332, B:127:0x0337, B:130:0x035c, B:134:0x0370, B:138:0x0383, B:141:0x0340, B:142:0x0348, B:143:0x02e9, B:145:0x0301, B:156:0x039d, B:163:0x03c4, B:164:0x03a8, B:170:0x03b1, B:176:0x058c, B:189:0x03e5, B:192:0x03ee, B:197:0x03f9, B:200:0x0400, B:202:0x0405, B:206:0x040d, B:208:0x0415, B:210:0x0426, B:216:0x0497, B:220:0x0449, B:222:0x046e, B:226:0x0482, B:232:0x04b0, B:236:0x04e5, B:238:0x04ec, B:242:0x0503, B:243:0x0508, B:246:0x052f, B:250:0x0543, B:254:0x055a, B:257:0x0512, B:258:0x051a, B:259:0x04bc, B:261:0x04d1, B:269:0x0561, B:271:0x057a, B:23:0x013e, B:27:0x0149, B:33:0x017f, B:306:0x00e8), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:134:0x0370 A[Catch: IOException -> 0x0613, LOOP:14: B:133:0x036e->B:134:0x0370, LOOP_END, TryCatch #1 {IOException -> 0x0613, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0087, B:10:0x00d6, B:11:0x00e2, B:12:0x00fb, B:14:0x011a, B:19:0x0132, B:38:0x019f, B:278:0x05c4, B:283:0x05d9, B:280:0x05f3, B:40:0x01b3, B:50:0x01cd, B:54:0x01d5, B:56:0x01da, B:59:0x01f1, B:62:0x01ff, B:67:0x0208, B:72:0x022f, B:75:0x0213, B:77:0x0224, B:85:0x0243, B:90:0x0256, B:92:0x025e, B:94:0x026d, B:100:0x02ce, B:104:0x0288, B:106:0x02a9, B:110:0x02bd, B:116:0x02da, B:120:0x0318, B:122:0x031f, B:126:0x0332, B:127:0x0337, B:130:0x035c, B:134:0x0370, B:138:0x0383, B:141:0x0340, B:142:0x0348, B:143:0x02e9, B:145:0x0301, B:156:0x039d, B:163:0x03c4, B:164:0x03a8, B:170:0x03b1, B:176:0x058c, B:189:0x03e5, B:192:0x03ee, B:197:0x03f9, B:200:0x0400, B:202:0x0405, B:206:0x040d, B:208:0x0415, B:210:0x0426, B:216:0x0497, B:220:0x0449, B:222:0x046e, B:226:0x0482, B:232:0x04b0, B:236:0x04e5, B:238:0x04ec, B:242:0x0503, B:243:0x0508, B:246:0x052f, B:250:0x0543, B:254:0x055a, B:257:0x0512, B:258:0x051a, B:259:0x04bc, B:261:0x04d1, B:269:0x0561, B:271:0x057a, B:23:0x013e, B:27:0x0149, B:33:0x017f, B:306:0x00e8), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:178:0x0594 A[SYNTHETIC] */
        /* JADX WARN: Removed duplicated region for block: B:181:0x05a9 A[SYNTHETIC] */
        /* JADX WARN: Removed duplicated region for block: B:246:0x052f A[Catch: IOException -> 0x0613, LOOP:25: B:245:0x052d->B:246:0x052f, LOOP_END, TryCatch #1 {IOException -> 0x0613, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0087, B:10:0x00d6, B:11:0x00e2, B:12:0x00fb, B:14:0x011a, B:19:0x0132, B:38:0x019f, B:278:0x05c4, B:283:0x05d9, B:280:0x05f3, B:40:0x01b3, B:50:0x01cd, B:54:0x01d5, B:56:0x01da, B:59:0x01f1, B:62:0x01ff, B:67:0x0208, B:72:0x022f, B:75:0x0213, B:77:0x0224, B:85:0x0243, B:90:0x0256, B:92:0x025e, B:94:0x026d, B:100:0x02ce, B:104:0x0288, B:106:0x02a9, B:110:0x02bd, B:116:0x02da, B:120:0x0318, B:122:0x031f, B:126:0x0332, B:127:0x0337, B:130:0x035c, B:134:0x0370, B:138:0x0383, B:141:0x0340, B:142:0x0348, B:143:0x02e9, B:145:0x0301, B:156:0x039d, B:163:0x03c4, B:164:0x03a8, B:170:0x03b1, B:176:0x058c, B:189:0x03e5, B:192:0x03ee, B:197:0x03f9, B:200:0x0400, B:202:0x0405, B:206:0x040d, B:208:0x0415, B:210:0x0426, B:216:0x0497, B:220:0x0449, B:222:0x046e, B:226:0x0482, B:232:0x04b0, B:236:0x04e5, B:238:0x04ec, B:242:0x0503, B:243:0x0508, B:246:0x052f, B:250:0x0543, B:254:0x055a, B:257:0x0512, B:258:0x051a, B:259:0x04bc, B:261:0x04d1, B:269:0x0561, B:271:0x057a, B:23:0x013e, B:27:0x0149, B:33:0x017f, B:306:0x00e8), top: B:2:0x0035 }] */
        /* JADX WARN: Removed duplicated region for block: B:250:0x0543 A[Catch: IOException -> 0x0613, LOOP:26: B:249:0x0541->B:250:0x0543, LOOP_END, TryCatch #1 {IOException -> 0x0613, blocks: (B:3:0x0035, B:4:0x0053, B:6:0x005d, B:8:0x0087, B:10:0x00d6, B:11:0x00e2, B:12:0x00fb, B:14:0x011a, B:19:0x0132, B:38:0x019f, B:278:0x05c4, B:283:0x05d9, B:280:0x05f3, B:40:0x01b3, B:50:0x01cd, B:54:0x01d5, B:56:0x01da, B:59:0x01f1, B:62:0x01ff, B:67:0x0208, B:72:0x022f, B:75:0x0213, B:77:0x0224, B:85:0x0243, B:90:0x0256, B:92:0x025e, B:94:0x026d, B:100:0x02ce, B:104:0x0288, B:106:0x02a9, B:110:0x02bd, B:116:0x02da, B:120:0x0318, B:122:0x031f, B:126:0x0332, B:127:0x0337, B:130:0x035c, B:134:0x0370, B:138:0x0383, B:141:0x0340, B:142:0x0348, B:143:0x02e9, B:145:0x0301, B:156:0x039d, B:163:0x03c4, B:164:0x03a8, B:170:0x03b1, B:176:0x058c, B:189:0x03e5, B:192:0x03ee, B:197:0x03f9, B:200:0x0400, B:202:0x0405, B:206:0x040d, B:208:0x0415, B:210:0x0426, B:216:0x0497, B:220:0x0449, B:222:0x046e, B:226:0x0482, B:232:0x04b0, B:236:0x04e5, B:238:0x04ec, B:242:0x0503, B:243:0x0508, B:246:0x052f, B:250:0x0543, B:254:0x055a, B:257:0x0512, B:258:0x051a, B:259:0x04bc, B:261:0x04d1, B:269:0x0561, B:271:0x057a, B:23:0x013e, B:27:0x0149, B:33:0x017f, B:306:0x00e8), top: B:2:0x0035 }] */
        @Override // java.lang.Thread, java.lang.Runnable
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public void run() {
            /*
                Method dump skipped, instructions count: 1562
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.hankcs.hanlp.mining.word2vec.Word2VecTraining.TrainModelThread.run():void");
        }
    }

    /* loaded from: classes.dex */
    static class VocabWordComparator implements Comparator<VocabWord> {
        VocabWordComparator() {
        }

        @Override // java.util.Comparator
        public int compare(VocabWord vocabWord, VocabWord vocabWord2) {
            return vocabWord2.f658cn - vocabWord.f658cn;
        }
    }

    static {
        for (int i = 0; i < 1000; i++) {
            double[] dArr = expTable;
            dArr[i] = Math.exp((((i / 1000.0d) * 2.0d) - 1.0d) * 6.0d);
            dArr[i] = dArr[i] / (dArr[i] + 1.0d);
        }
    }

    public Word2VecTraining(Config config) {
        this.config = config;
    }

    static long nextRandom(long j) {
        return (j * 25214903917L) + 11;
    }

    static double[] posixMemAlign128(int i) {
        return i % 128 > 0 ? new double[((i / 128) + 1) * 128] : new double[i];
    }

    public Config getConfig() {
        return this.config;
    }

    void initNet(Corpus corpus) {
        int layer1Size = this.config.getLayer1Size();
        int vocabSize = corpus.getVocabSize();
        int i = vocabSize * layer1Size;
        syn0 = posixMemAlign128(i);
        if (this.config.useHierarchicalSoftmax()) {
            syn1 = posixMemAlign128(i);
            for (int i2 = 0; i2 < vocabSize; i2++) {
                for (int i3 = 0; i3 < layer1Size; i3++) {
                    syn1[(i2 * layer1Size) + i3] = 0.0d;
                }
            }
        }
        if (this.config.getNegative() > 0) {
            syn1neg = posixMemAlign128(i);
            for (int i4 = 0; i4 < vocabSize; i4++) {
                for (int i5 = 0; i5 < layer1Size; i5++) {
                    syn1neg[(i4 * layer1Size) + i5] = 0.0d;
                }
            }
        }
        long j = 1;
        for (int i6 = 0; i6 < vocabSize; i6++) {
            for (int i7 = 0; i7 < layer1Size; i7++) {
                j = nextRandom(j);
                syn0[(i6 * layer1Size) + i7] = (((65535 & j) / 65536.0d) - 0.5d) / layer1Size;
            }
        }
        corpus.createBinaryTree();
    }

    void initUnigramTable(Corpus corpus) {
        int vocabSize = corpus.getVocabSize();
        VocabWord[] vocab = corpus.getVocab();
        this.table = new int[TABLE_SIZE];
        long j = 0;
        for (int i = 0; i < vocabSize; i++) {
            j = (long) (j + Math.pow(vocab[i].f658cn, 0.75d));
        }
        double d = j;
        double pow = Math.pow(vocab[0].f658cn, 0.75d) / d;
        int i2 = 0;
        for (int i3 = 0; i3 < TABLE_SIZE; i3++) {
            this.table[i3] = i2;
            if (i3 / 1.0E8d > pow) {
                i2++;
                pow += Math.pow(vocab[i2].f658cn, 0.75d) / d;
            }
            if (i2 >= vocabSize) {
                i2 = vocabSize - 1;
            }
        }
    }

    public void trainModel() throws IOException {
        FileOutputStream fileOutputStream;
        OutputStreamWriter outputStreamWriter;
        PrintWriter printWriter;
        int layer1Size = this.config.getLayer1Size();
        TextFileCorpus textFileCorpus = new TextFileCorpus(this.config);
        Predefine.logger.info("learning vocabulary");
        textFileCorpus.learnVocab();
        Predefine.logger.info("sorting vocabulary");
        textFileCorpus.sortVocab();
        int vocabSize = textFileCorpus.getVocabSize();
        VocabWord[] vocab = textFileCorpus.getVocab();
        Predefine.logger.info("Vocab size: " + vocabSize);
        Predefine.logger.info("Words in train file: " + textFileCorpus.getTrainWords());
        if (this.config.getOutputFile() == null) {
            return;
        }
        initNet(textFileCorpus);
        if (this.config.getNegative() > 0) {
            initUnigramTable(textFileCorpus);
        }
        this.timeStart = System.currentTimeMillis();
        this.threadCount = this.config.getNumThreads();
        for (int i = 0; i < this.config.getNumThreads(); i++) {
            new TrainModelThread(this, new CacheCorpus(textFileCorpus), this.config, i).start();
        }
        textFileCorpus.shutdown();
        synchronized (this) {
            while (this.threadCount > 0) {
                try {
                    wait();
                } catch (InterruptedException unused) {
                }
            }
        }
        System.err.println();
        Predefine.logger.info(String.format("finished training in %s", Utility.humanTime(System.currentTimeMillis() - this.timeStart)));
        PrintWriter printWriter2 = null;
        syn1 = null;
        this.table = null;
        try {
            fileOutputStream = new FileOutputStream(this.config.getOutputFile());
            try {
                outputStreamWriter = new OutputStreamWriter(fileOutputStream, ENCODING);
                try {
                    printWriter = new PrintWriter(outputStreamWriter);
                } catch (Throwable th) {
                    th = th;
                }
            } catch (Throwable th2) {
                th = th2;
                outputStreamWriter = null;
            }
        } catch (Throwable th3) {
            th = th3;
            fileOutputStream = null;
            outputStreamWriter = null;
        }
        try {
            Predefine.logger.info("now saving the word vectors to the file " + this.config.getOutputFile());
            printWriter.printf("%d %d\n", Integer.valueOf(vocabSize), Integer.valueOf(layer1Size));
            for (int i2 = 0; i2 < vocabSize; i2++) {
                printWriter.print(vocab[i2].word);
                for (int i3 = 0; i3 < layer1Size; i3++) {
                    printWriter.printf(" %f", Double.valueOf(syn0[(i2 * layer1Size) + i3]));
                }
                printWriter.println();
            }
            textFileCorpus.close();
            Utility.closeQuietly((Writer) printWriter);
            Utility.closeQuietly((Writer) outputStreamWriter);
            Utility.closeQuietly((OutputStream) fileOutputStream);
        } catch (Throwable th4) {
            th = th4;
            printWriter2 = printWriter;
            textFileCorpus.close();
            Utility.closeQuietly((Writer) printWriter2);
            Utility.closeQuietly((Writer) outputStreamWriter);
            Utility.closeQuietly((OutputStream) fileOutputStream);
            throw th;
        }
    }
}
