package com.hankcs.hanlp.mining.cluster;

import com.github.mikephil.charting.utils.Utils;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.classification.utilities.io.ConsoleLogger;
import com.hankcs.hanlp.classification.utilities.io.ILogger;
import com.hankcs.hanlp.collection.trie.datrie.MutableDoubleArrayTrieInteger;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.MathUtility;
import com.litesuits.orm.db.assit.SQLBuilder;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;

/* loaded from: classes3.dex */
public class ClusterAnalyzer<K> {
    static final int NUM_REFINE_LOOP = 30;
    protected HashMap<K, Document<K>> documents_ = new HashMap<>();
    protected Segment segment = HanLP.newSegment();
    protected MutableDoubleArrayTrieInteger vocabulary = new MutableDoubleArrayTrieInteger();

    public static double evaluate(String str, String str2) {
        ClusterAnalyzer clusterAnalyzer;
        int i;
        File[] listFiles;
        ClusterAnalyzer clusterAnalyzer2;
        int i2;
        File file;
        File[] fileArr;
        if (str == null) {
            throw new IllegalArgumentException("参数 folderPath == null");
        }
        File file2 = new File(str);
        char c = 0;
        int i3 = 1;
        if (!file2.exists()) {
            throw new IllegalArgumentException(String.format("目录 %s 不存在", file2.getAbsolutePath()));
        }
        if (!file2.isDirectory()) {
            throw new IllegalArgumentException(String.format("目录 %s 不是一个目录", file2.getAbsolutePath()));
        }
        ClusterAnalyzer clusterAnalyzer3 = new ClusterAnalyzer();
        File[] listFiles2 = file2.listFiles();
        if (listFiles2 == null) {
            return 1.0d;
        }
        ConsoleLogger.logger.start("根目录:%s\n加载中...\n", str);
        int[] iArr = new int[listFiles2.length];
        String[] strArr = new String[listFiles2.length];
        int length = listFiles2.length;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        while (i4 < length) {
            File file3 = listFiles2[i4];
            if (file3.isFile() || (listFiles = file3.listFiles()) == null) {
                clusterAnalyzer = clusterAnalyzer3;
                i = length;
            } else {
                String name = file3.getName();
                strArr[i6] = name;
                ILogger iLogger = ConsoleLogger.logger;
                Object[] objArr = new Object[i3];
                objArr[c] = name;
                iLogger.out("[%s]...", objArr);
                int length2 = listFiles.length;
                int i7 = length2 + 0;
                int ceil = (int) Math.ceil(i7 / 10000.0f);
                int i8 = i5;
                int i9 = length2;
                int i10 = 0;
                while (i10 < i9) {
                    int i11 = i9;
                    StringBuilder sb = new StringBuilder();
                    int i12 = length;
                    sb.append(file3.getName());
                    sb.append(SQLBuilder.BLANK);
                    sb.append(listFiles[i10].getName());
                    clusterAnalyzer3.addDocument((ClusterAnalyzer) sb.toString(), IOUtil.readTxt(listFiles[i10].getAbsolutePath()));
                    if (i10 % ceil == 0) {
                        i2 = ceil;
                        file = file3;
                        fileArr = listFiles;
                        clusterAnalyzer2 = clusterAnalyzer3;
                        ConsoleLogger.logger.out("%c[%s]...%.2f%%", 13, name, Double.valueOf(MathUtility.percentage(i10 + 0 + 1, i7)));
                    } else {
                        clusterAnalyzer2 = clusterAnalyzer3;
                        i2 = ceil;
                        file = file3;
                        fileArr = listFiles;
                    }
                    i8++;
                    iArr[i6] = iArr[i6] + 1;
                    i10++;
                    i9 = i11;
                    length = i12;
                    file3 = file;
                    ceil = i2;
                    listFiles = fileArr;
                    clusterAnalyzer3 = clusterAnalyzer2;
                }
                clusterAnalyzer = clusterAnalyzer3;
                i = length;
                ConsoleLogger.logger.out(" %d 篇文档\n", Integer.valueOf(i7));
                i6++;
                i5 = i8;
            }
            i4++;
            length = i;
            clusterAnalyzer3 = clusterAnalyzer;
            c = 0;
            i3 = 1;
        }
        ClusterAnalyzer clusterAnalyzer4 = clusterAnalyzer3;
        ConsoleLogger.logger.finish(" 加载了 %d 个类目,共 %d 篇文档\n", Integer.valueOf(listFiles2.length), Integer.valueOf(i5));
        ConsoleLogger.logger.start(str2 + "聚类中...", new Object[0]);
        List<Set<K>> kmeans = str2.replaceAll("[-\\s]", "").toLowerCase().equals("kmeans") ? clusterAnalyzer4.kmeans(iArr.length) : clusterAnalyzer4.repeatedBisection(iArr.length);
        ConsoleLogger.logger.finish(" 完毕。\n", new Object[0]);
        double[] dArr = new double[iArr.length];
        for (int i13 = 0; i13 < iArr.length; i13++) {
            Iterator<Set<K>> it = kmeans.iterator();
            while (it.hasNext()) {
                Iterator<K> it2 = it.next().iterator();
                int i14 = 0;
                while (it2.hasNext()) {
                    if (((String) it2.next()).startsWith(strArr[i13])) {
                        i14++;
                    }
                }
                if (i14 != 0) {
                    double d = i14;
                    double size = d / r5.size();
                    double d2 = d / iArr[i13];
                    dArr[i13] = Math.max(dArr[i13], ((2.0d * size) * d2) / (size + d2));
                }
            }
        }
        double d3 = Utils.DOUBLE_EPSILON;
        for (int i15 = 0; i15 < dArr.length; i15++) {
            d3 += (dArr[i15] * iArr[i15]) / i5;
        }
        return d3;
    }

    private List<Set<K>> toResult(List<Cluster<K>> list) {
        ArrayList arrayList = new ArrayList(list.size());
        for (Cluster<K> cluster : list) {
            HashSet hashSet = new HashSet();
            Iterator<Document<K>> it = cluster.documents_.iterator();
            while (it.hasNext()) {
                hashSet.add(it.next().id_);
            }
            arrayList.add(hashSet);
        }
        return arrayList;
    }

    public Document<K> addDocument(K k, String str) {
        return addDocument((ClusterAnalyzer<K>) k, preprocess(str));
    }

    public Document<K> addDocument(K k, List<String> list) {
        return this.documents_.put(k, new Document<>(k, toVector(list)));
    }

    protected int id(String str) {
        int i = this.vocabulary.get(str);
        if (i != -1) {
            return i;
        }
        int size = this.vocabulary.size();
        this.vocabulary.put(str, size);
        return size;
    }

    public List<Set<K>> kmeans(int i) {
        Cluster cluster = new Cluster();
        Iterator<Document<K>> it = this.documents_.values().iterator();
        while (it.hasNext()) {
            cluster.add_document(it.next());
        }
        cluster.section(i);
        refine_clusters(cluster.sectioned_clusters());
        ArrayList arrayList = new ArrayList(i);
        for (Cluster<K> cluster2 : cluster.sectioned_clusters()) {
            cluster2.refresh();
            arrayList.add(cluster2);
        }
        return toResult(arrayList);
    }

    protected List<String> preprocess(String str) {
        List<Term> seg = this.segment.seg(str);
        ListIterator<Term> listIterator = seg.listIterator();
        while (listIterator.hasNext()) {
            Term next = listIterator.next();
            if (CoreStopWordDictionary.contains(next.word) || next.nature.startsWith("w")) {
                listIterator.remove();
            }
        }
        ArrayList arrayList = new ArrayList(seg.size());
        Iterator<Term> it = seg.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().word);
        }
        return arrayList;
    }

    double refine_clusters(List<Cluster<K>> list) {
        char c;
        int i;
        double d;
        Iterator it;
        double[] dArr = new double[list.size()];
        Iterator<Cluster<K>> it2 = list.iterator();
        char c2 = 0;
        int i2 = 0;
        while (it2.hasNext()) {
            dArr[i2] = it2.next().composite_vector().norm();
            i2++;
        }
        int i3 = 0;
        double d2 = Utils.DOUBLE_EPSILON;
        while (true) {
            int i4 = i3 + 1;
            if (i3 >= 30) {
                return d2;
            }
            ArrayList arrayList = new ArrayList(this.documents_.size());
            int i5 = 0;
            while (true) {
                c = 1;
                if (i5 >= list.size()) {
                    break;
                }
                for (int i6 = 0; i6 < list.get(i5).documents().size(); i6++) {
                    int[] iArr = new int[2];
                    iArr[c2] = i5;
                    iArr[1] = i6;
                    arrayList.add(iArr);
                }
                i5++;
            }
            Collections.shuffle(arrayList);
            Iterator it3 = arrayList.iterator();
            double d3 = d2;
            boolean z = false;
            while (it3.hasNext()) {
                int[] iArr2 = (int[]) it3.next();
                int i7 = iArr2[c2];
                int i8 = iArr2[c];
                Cluster<K> cluster = list.get(i7);
                Document<K> document = cluster.documents().get(i8);
                Iterator it4 = it3;
                double pow = Math.pow(dArr[i7], 2.0d) + refined_vector_value(cluster.composite_vector(), document.feature(), -1);
                double sqrt = pow > Utils.DOUBLE_EPSILON ? Math.sqrt(pow) : Utils.DOUBLE_EPSILON;
                double d4 = -1.0d;
                int i9 = 0;
                int i10 = 0;
                double d5 = Utils.DOUBLE_EPSILON;
                while (i9 < list.size()) {
                    if (i7 == i9) {
                        i = i4;
                        d = d3;
                        it = it4;
                    } else {
                        i = i4;
                        d = d3;
                        it = it4;
                        double pow2 = Math.pow(dArr[i9], 2.0d) + refined_vector_value(list.get(i9).composite_vector(), document.feature(), 1);
                        double sqrt2 = pow2 > Utils.DOUBLE_EPSILON ? Math.sqrt(pow2) : Utils.DOUBLE_EPSILON;
                        double d6 = ((sqrt + sqrt2) - dArr[i7]) - dArr[i9];
                        if (d4 < d6) {
                            d5 = sqrt2;
                            d4 = d6;
                            i10 = i9;
                        }
                    }
                    i9++;
                    i4 = i;
                    d3 = d;
                    it4 = it;
                }
                int i11 = i4;
                double d7 = d3;
                Iterator it5 = it4;
                if (d4 > Utils.DOUBLE_EPSILON) {
                    d3 = d7 + d4;
                    int i12 = i10;
                    list.get(i12).add_document(document);
                    list.get(i7).remove_document(i8);
                    dArr[i7] = sqrt;
                    dArr[i12] = d5;
                    z = true;
                } else {
                    d3 = d7;
                }
                i4 = i11;
                it3 = it5;
                c2 = 0;
                c = 1;
            }
            int i13 = i4;
            double d8 = d3;
            if (!z) {
                return d8;
            }
            Iterator<Cluster<K>> it6 = list.iterator();
            while (it6.hasNext()) {
                it6.next().refresh();
            }
            i3 = i13;
            d2 = d8;
            c2 = 0;
        }
    }

    double refined_vector_value(SparseVector sparseVector, SparseVector sparseVector2, int i) {
        double d = Utils.DOUBLE_EPSILON;
        for (Map.Entry<Integer, Double> entry : sparseVector2.entrySet()) {
            d += Math.pow(entry.getValue().doubleValue(), 2.0d) + (i * 2 * sparseVector.get((Object) entry.getKey()).doubleValue() * entry.getValue().doubleValue());
        }
        return d;
    }

    public List<Set<K>> repeatedBisection(double d) {
        return repeatedBisection(0, d);
    }

    public List<Set<K>> repeatedBisection(int i) {
        return repeatedBisection(i, Utils.DOUBLE_EPSILON);
    }

    public List<Set<K>> repeatedBisection(int i, double d) {
        Cluster cluster = new Cluster();
        ArrayList arrayList = new ArrayList(i > 0 ? i : 16);
        Iterator<Document<K>> it = this.documents_.values().iterator();
        while (it.hasNext()) {
            cluster.add_document(it.next());
        }
        PriorityQueue priorityQueue = new PriorityQueue();
        cluster.section(2);
        refine_clusters(cluster.sectioned_clusters());
        cluster.set_sectioned_gain();
        cluster.composite_vector().clear();
        priorityQueue.add(cluster);
        while (!priorityQueue.isEmpty() && (i <= 0 || priorityQueue.size() < i)) {
            Cluster cluster2 = (Cluster) priorityQueue.peek();
            if (cluster2.sectioned_clusters().size() < 1 || (d > Utils.DOUBLE_EPSILON && cluster2.sectioned_gain() < d)) {
                break;
            }
            priorityQueue.poll();
            for (Cluster<K> cluster3 : cluster2.sectioned_clusters()) {
                if (cluster3.size() >= 2) {
                    cluster3.section(2);
                    refine_clusters(cluster3.sectioned_clusters());
                    cluster3.set_sectioned_gain();
                    if (cluster3.sectioned_gain() < d) {
                        Iterator<Cluster<K>> it2 = cluster3.sectioned_clusters().iterator();
                        while (it2.hasNext()) {
                            it2.next().clear();
                        }
                    }
                    cluster3.composite_vector().clear();
                }
                priorityQueue.add(cluster3);
            }
        }
        while (!priorityQueue.isEmpty()) {
            arrayList.add(0, priorityQueue.poll());
        }
        return toResult(arrayList);
    }

    protected SparseVector toVector(List<String> list) {
        SparseVector sparseVector = new SparseVector();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            int id = id(it.next());
            Double d = sparseVector.get((Object) Integer.valueOf(id));
            if (d == null) {
                sparseVector.put(Integer.valueOf(id), Double.valueOf(1.0d));
            } else {
                sparseVector.put(Integer.valueOf(id), Double.valueOf(d.doubleValue() + 1.0d));
            }
        }
        return sparseVector;
    }
}
