package cn.edu.hfut.dmic.webcollector.fetcher;

import cn.edu.hfut.dmic.webcollector.crawldb.DBManager;
import cn.edu.hfut.dmic.webcollector.crawldb.Generator;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.net.Requester;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.zlog.Logger;
import org.zlog.LoggerFactory;

/* loaded from: classes.dex */
public class Fetcher {
    public static final int FETCH_FAILED = 2;
    public static final int FETCH_SUCCESS = 1;
    public static final Logger LOG = LoggerFactory.getLogger(Fetcher.class);
    private AtomicInteger activeThreads;
    public DBManager dbManager;
    private QueueFeeder feeder;
    private FetchQueue fetchQueue;
    private AtomicLong lastRequestStart;
    public Requester requester;
    boolean running;
    private AtomicInteger spinWaiting;
    public Visitor visitor;
    private int retry = 3;
    private long retryInterval = 0;
    private long visitInterval = 0;
    private int threads = 50;
    private boolean isContentStored = false;

    /* loaded from: classes.dex */
    public static class FetchItem {
        public CrawlDatum datum;

        public FetchItem(CrawlDatum crawlDatum) {
            this.datum = crawlDatum;
        }
    }

    /* loaded from: classes.dex */
    public static class FetchQueue {
        public AtomicInteger totalSize = new AtomicInteger(0);
        public final List<FetchItem> queue = Collections.synchronizedList(new LinkedList());

        public synchronized void addFetchItem(FetchItem fetchItem) {
            if (fetchItem != null) {
                this.queue.add(fetchItem);
                this.totalSize.incrementAndGet();
            }
        }

        public void clear() {
            this.queue.clear();
        }

        public synchronized void dump() {
            for (int i = 0; i < this.queue.size(); i++) {
                Fetcher.LOG.info("  " + i + ". " + this.queue.get(i).datum.getUrl());
            }
        }

        public synchronized FetchItem getFetchItem() {
            return this.queue.isEmpty() ? null : this.queue.remove(0);
        }

        public int getSize() {
            return this.queue.size();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes.dex */
    public class FetcherThread extends Thread {
        private FetcherThread() {
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            Fetcher.this.activeThreads.incrementAndGet();
            while (Fetcher.this.running) {
                try {
                    try {
                        FetchItem fetchItem = Fetcher.this.fetchQueue.getFetchItem();
                        if (fetchItem != null) {
                            Fetcher.this.lastRequestStart.set(System.currentTimeMillis());
                            CrawlDatum crawlDatum = fetchItem.datum;
                            crawlDatum.getUrl();
                            Page page = Fetcher.this.getPage(crawlDatum);
                            crawlDatum.incrRetry(page.getRetry());
                            crawlDatum.setFetchTime(System.currentTimeMillis());
                            CrawlDatums crawlDatums = new CrawlDatums();
                            if (Fetcher.this.visit(crawlDatum, page, crawlDatums)) {
                                try {
                                    Fetcher.this.dbManager.wrtieFetchSegment(crawlDatum);
                                    if (page.getResponse() != null) {
                                        if (page.getResponse().isRedirect() && page.getResponse().getRealUrl() != null) {
                                            Fetcher.this.dbManager.writeRedirectSegment(crawlDatum, page.getResponse().getRealUrl().toString());
                                        }
                                        if (!crawlDatums.isEmpty()) {
                                            Fetcher.this.dbManager.wrtieParseSegment(crawlDatums);
                                        }
                                    }
                                } catch (Exception e) {
                                    Fetcher.LOG.info("Exception when updating db", e);
                                }
                            }
                            if (Fetcher.this.visitInterval > 0) {
                                try {
                                    Thread.sleep(Fetcher.this.visitInterval);
                                } catch (Exception e2) {
                                }
                            }
                        } else {
                            if (!Fetcher.this.feeder.isAlive() && Fetcher.this.fetchQueue.getSize() <= 0) {
                                return;
                            }
                            Fetcher.this.spinWaiting.incrementAndGet();
                            try {
                                Thread.sleep(500L);
                            } catch (Exception e3) {
                            }
                            Fetcher.this.spinWaiting.decrementAndGet();
                        }
                    } catch (Exception e4) {
                        Fetcher.LOG.info("Exception", e4);
                    }
                } catch (Exception e5) {
                    Fetcher.LOG.info("Exception", e5);
                    return;
                } finally {
                    Fetcher.this.activeThreads.decrementAndGet();
                }
            }
        }
    }

    /* loaded from: classes.dex */
    public static class QueueFeeder extends Thread {
        public Generator generator;
        public FetchQueue queue;
        public boolean running = true;
        public int size;

        public QueueFeeder(FetchQueue fetchQueue, Generator generator, int i) {
            this.queue = fetchQueue;
            this.generator = generator;
            this.size = i;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            boolean z = true;
            this.running = true;
            while (z && this.running) {
                int size = this.size - this.queue.getSize();
                if (size <= 0) {
                    try {
                        Thread.sleep(1000L);
                    } catch (InterruptedException e) {
                    }
                } else {
                    while (size > 0 && z && this.running) {
                        CrawlDatum next = this.generator.next();
                        z = next != null;
                        if (z) {
                            this.queue.addFetchItem(new FetchItem(next));
                            size--;
                        }
                    }
                }
            }
        }

        public void stopFeeder() {
            this.running = false;
            while (isAlive()) {
                try {
                    Thread.sleep(1000L);
                    Fetcher.LOG.info("stopping feeder......");
                } catch (InterruptedException e) {
                }
            }
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:47:0x019b, code lost:
    
        cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG.info("kill threads");
        r2 = 0;
     */
    /* JADX WARN: Code restructure failed: missing block: B:49:0x01a4, code lost:
    
        if (r2 >= r1.length) goto L79;
     */
    /* JADX WARN: Code restructure failed: missing block: B:51:0x01ac, code lost:
    
        if (r1[r2].isAlive() == false) goto L81;
     */
    /* JADX WARN: Code restructure failed: missing block: B:53:0x01ae, code lost:
    
        r1[r2].stop();
        cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG.info("kill thread " + r2);
     */
    /* JADX WARN: Code restructure failed: missing block: B:57:0x01dc, code lost:
    
        r0 = move-exception;
     */
    /* JADX WARN: Code restructure failed: missing block: B:58:0x01dd, code lost:
    
        cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG.info("Exception", r0);
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public void fetchAll(cn.edu.hfut.dmic.webcollector.crawldb.Generator r11) throws java.lang.Exception {
        /*
            Method dump skipped, instructions count: 680
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.fetchAll(cn.edu.hfut.dmic.webcollector.crawldb.Generator):void");
    }

    public DBManager getDBManager() {
        return this.dbManager;
    }

    /* JADX WARN: Removed duplicated region for block: B:16:0x00cf  */
    /* JADX WARN: Removed duplicated region for block: B:9:0x0014  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public cn.edu.hfut.dmic.webcollector.model.Page getPage(cn.edu.hfut.dmic.webcollector.model.CrawlDatum r15) {
        /*
            r14 = this;
            java.lang.String r9 = r15.getUrl()
            r4 = 0
            r6 = 0
            r1 = 0
            r5 = 0
        L8:
            int r10 = r14.retry
            if (r6 > r10) goto L12
            cn.edu.hfut.dmic.webcollector.net.Requester r10 = r14.requester     // Catch: java.lang.Exception -> L37
            cn.edu.hfut.dmic.webcollector.net.HttpResponse r4 = r10.getResponse(r15)     // Catch: java.lang.Exception -> L37
        L12:
            if (r4 == 0) goto Lcf
            boolean r10 = r4.isNotFound()
            if (r10 != 0) goto Laf
            org.zlog.Logger r10 = cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG
            java.lang.StringBuilder r11 = new java.lang.StringBuilder
            r11.<init>()
            java.lang.String r12 = "fetch URL: "
            java.lang.StringBuilder r11 = r11.append(r12)
            java.lang.StringBuilder r11 = r11.append(r9)
            java.lang.String r11 = r11.toString()
            r10.info(r11)
        L32:
            cn.edu.hfut.dmic.webcollector.model.Page r3 = cn.edu.hfut.dmic.webcollector.model.Page.createSuccessPage(r15, r5, r4)
        L36:
            return r3
        L37:
            r0 = move-exception
            java.lang.String r8 = "th "
            int r10 = r6 + 1
            switch(r10) {
                case 1: goto La6;
                case 2: goto La9;
                case 3: goto Lac;
                default: goto L3f;
            }
        L3f:
            java.lang.String r8 = "th "
        L41:
            r1 = r0
            int r10 = r14.retry
            if (r6 >= r10) goto La2
            java.lang.StringBuilder r7 = new java.lang.StringBuilder
            r7.<init>()
            java.lang.String r10 = "retry "
            java.lang.StringBuilder r10 = r7.append(r10)
            int r11 = r6 + 1
            java.lang.StringBuilder r10 = r10.append(r11)
            java.lang.StringBuilder r10 = r10.append(r8)
            java.lang.String r11 = "URL:"
            java.lang.StringBuilder r10 = r10.append(r11)
            java.lang.StringBuilder r10 = r10.append(r9)
            java.lang.String r11 = " after "
            java.lang.StringBuilder r10 = r10.append(r11)
            long r12 = r14.retryInterval
            java.lang.StringBuilder r10 = r10.append(r12)
            java.lang.String r11 = "ms "
            java.lang.StringBuilder r10 = r10.append(r11)
            java.lang.String r11 = "("
            java.lang.StringBuilder r10 = r10.append(r11)
            java.lang.String r11 = r0.toString()
            java.lang.StringBuilder r10 = r10.append(r11)
            java.lang.String r11 = ")"
            r10.append(r11)
            java.lang.String r2 = r7.toString()
            org.zlog.Logger r10 = cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG
            r10.info(r2)
            int r5 = r5 + 1
            long r10 = r14.retryInterval
            r12 = 0
            int r10 = (r10 > r12 ? 1 : (r10 == r12 ? 0 : -1))
            if (r10 <= 0) goto La2
            long r10 = r14.retryInterval     // Catch: java.lang.Exception -> Lfd
            java.lang.Thread.sleep(r10)     // Catch: java.lang.Exception -> Lfd
        La2:
            int r6 = r6 + 1
            goto L8
        La6:
            java.lang.String r8 = "st "
            goto L41
        La9:
            java.lang.String r8 = "nd "
            goto L41
        Lac:
            java.lang.String r8 = "rd "
            goto L41
        Laf:
            org.zlog.Logger r10 = cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG
            java.lang.StringBuilder r11 = new java.lang.StringBuilder
            r11.<init>()
            java.lang.String r12 = "ignore URL: "
            java.lang.StringBuilder r11 = r11.append(r12)
            java.lang.StringBuilder r11 = r11.append(r9)
            java.lang.String r12 = " (not found)"
            java.lang.StringBuilder r11 = r11.append(r12)
            java.lang.String r11 = r11.toString()
            r10.info(r11)
            goto L32
        Lcf:
            org.zlog.Logger r10 = cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.LOG
            java.lang.StringBuilder r11 = new java.lang.StringBuilder
            r11.<init>()
            java.lang.String r12 = "failed URL: "
            java.lang.StringBuilder r11 = r11.append(r12)
            java.lang.StringBuilder r11 = r11.append(r9)
            java.lang.String r12 = " ("
            java.lang.StringBuilder r11 = r11.append(r12)
            java.lang.StringBuilder r11 = r11.append(r1)
            java.lang.String r12 = ")"
            java.lang.StringBuilder r11 = r11.append(r12)
            java.lang.String r11 = r11.toString()
            r10.info(r11)
            cn.edu.hfut.dmic.webcollector.model.Page r3 = cn.edu.hfut.dmic.webcollector.model.Page.createFailedPage(r15, r5, r1)
            goto L36
        Lfd:
            r10 = move-exception
            goto La2
        */
        throw new UnsupportedOperationException("Method not decompiled: cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.getPage(cn.edu.hfut.dmic.webcollector.model.CrawlDatum):cn.edu.hfut.dmic.webcollector.model.Page");
    }

    public Requester getRequester() {
        return this.requester;
    }

    public int getRetry() {
        return this.retry;
    }

    public long getRetryInterval() {
        return this.retryInterval;
    }

    public int getThreads() {
        return this.threads;
    }

    public long getVisitInterval() {
        return this.visitInterval;
    }

    public Visitor getVisitor() {
        return this.visitor;
    }

    public boolean isIsContentStored() {
        return this.isContentStored;
    }

    public void setDBManager(DBManager dBManager) {
        this.dbManager = dBManager;
    }

    public void setIsContentStored(boolean z) {
        this.isContentStored = z;
    }

    public void setRequester(Requester requester) {
        this.requester = requester;
    }

    public void setRetry(int i) {
        this.retry = i;
    }

    public void setRetryInterval(long j) {
        this.retryInterval = j;
    }

    public void setThreads(int i) {
        this.threads = i;
    }

    public void setVisitInterval(long j) {
        this.visitInterval = j;
    }

    public void setVisitor(Visitor visitor) {
        this.visitor = visitor;
    }

    public void stop() {
        this.running = false;
    }

    public boolean visit(CrawlDatum crawlDatum, Page page, CrawlDatums crawlDatums) {
        String url = crawlDatum.getUrl();
        if (page.getStatus() == 2) {
            crawlDatum.setStatus(1);
            crawlDatum.setHttpCode(page.getResponse().getCode());
            if (page.getResponse().isNotFound()) {
                try {
                    this.visitor.notFound(page, crawlDatums);
                } catch (Exception e) {
                    LOG.info("Exception when not found URL: " + url, e);
                    return false;
                }
            } else {
                try {
                    this.visitor.visit(page, crawlDatums);
                } catch (Exception e2) {
                    LOG.info("Exception when visit URL: " + url, e2);
                    return false;
                }
            }
            try {
                this.visitor.afterVisit(page, crawlDatums);
            } catch (Exception e3) {
                LOG.info("Exception after visit URL: " + url, e3);
                return false;
            }
        } else if (page.getStatus() == 1) {
            crawlDatum.setStatus(0);
            try {
                this.visitor.fail(page, crawlDatums);
            } catch (Exception e4) {
                LOG.info("Exception when execute failed URL: " + url, e4);
                return false;
            }
        }
        return true;
    }
}
