/*
 * Decompiled with CFR 0.152.
 */
package websphinx;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
import rcm.util.PriorityQueue;
import websphinx.Action;
import websphinx.Classifier;
import websphinx.CrawlEvent;
import websphinx.CrawlListener;
import websphinx.CrawlTimer;
import websphinx.DownloadParameters;
import websphinx.EventLog;
import websphinx.Link;
import websphinx.LinkEvent;
import websphinx.LinkListener;
import websphinx.LinkPredicate;
import websphinx.Page;
import websphinx.PagePredicate;
import websphinx.RobotExclusion;
import websphinx.StandardClassifier;
import websphinx.Worm;
import websphinx.WormTimer;

public class Crawler
implements Runnable,
Serializable {
    private static final long serialVersionUID = -3757789861952010450L;
    public static final String[] WEB;
    public static final String[] SERVER;
    public static final String[] SUBTREE;
    public static final String[] HYPERLINKS;
    public static final String[] HYPERLINKS_AND_IMAGES;
    public static final String[] ALL_LINKS;
    private String name = this.getClass().getName();
    private transient Link[] roots = null;
    private String[] rootHrefs = null;
    private String[] domain = WEB;
    private boolean synchronous = false;
    private boolean depthFirst = true;
    private String[] type = HYPERLINKS;
    private boolean ignoreVisitedLinks = true;
    private int maxDepth = 5;
    private DownloadParameters dp = new DownloadParameters().changeUserAgent(this.name);
    private Vector classifiers = new Vector();
    private LinkPredicate linkPredicate;
    private PagePredicate pagePredicate;
    private Action action;
    private transient Link[] crawledRoots = null;
    private transient int state = 2;
    private transient Worm[] worms;
    private transient PriorityQueue fetchQueue;
    private transient PriorityQueue crawlQueue;
    private transient int numLinksTested;
    private transient int numPagesVisited;
    private transient int numPagesLeft;
    private transient Vector crawlListeners;
    private transient Vector linkListeners;
    private transient Hashtable visitedPages;
    private transient RobotExclusion robotExclusion;

    public Crawler() {
        this.addClassifier(new StandardClassifier());
        this.init();
    }

    private void init() {
        this.state = 2;
        this.numLinksTested = 0;
        this.numPagesVisited = 0;
        this.numPagesLeft = 0;
        this.worms = null;
        this.crawlQueue = new PriorityQueue();
        this.fetchQueue = new PriorityQueue();
        this.crawlListeners = new Vector();
        this.linkListeners = new Vector();
        this.visitedPages = new Hashtable();
        this.robotExclusion = new RobotExclusion(this.getName());
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        if (this.roots != null) {
            this.rootHrefs = new String[this.roots.length];
            int i = 0;
            while (i < this.roots.length) {
                this.rootHrefs[i] = this.roots[i].getURL().toString();
                ++i;
            }
        } else {
            this.rootHrefs = null;
        }
        out.defaultWriteObject();
        this.rootHrefs = null;
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        if (this.rootHrefs != null) {
            this.roots = new Link[this.rootHrefs.length];
            int i = 0;
            while (i < this.rootHrefs.length) {
                this.roots[i] = new Link(this.rootHrefs[i]);
                ++i;
            }
        } else {
            this.roots = null;
        }
        this.domain = Crawler.useStandard(WEB, this.domain);
        this.domain = Crawler.useStandard(SERVER, this.domain);
        this.domain = Crawler.useStandard(SUBTREE, this.domain);
        this.type = Crawler.useStandard(HYPERLINKS, this.type);
        this.type = Crawler.useStandard(HYPERLINKS_AND_IMAGES, this.type);
        this.type = Crawler.useStandard(ALL_LINKS, this.type);
        this.init();
        if (this.linkPredicate != null) {
            this.linkPredicate.connected(this);
        }
        if (this.pagePredicate != null) {
            this.pagePredicate.connected(this);
        }
        if (this.action != null) {
            this.action.connected(this);
        }
    }

    private static String[] useStandard(String[] standard, String[] s) {
        if (s == null || standard == null || standard == s) {
            return s;
        }
        if (s.length != standard.length) {
            return s;
        }
        int i = 0;
        while (i < s.length) {
            if (!s[i].equals(standard[i])) {
                return s;
            }
            ++i;
        }
        return standard;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void run() {
        this.crawledRoots = this.roots;
        if (this.state == 1) {
            this.clear();
        }
        if (this.state == 2 && this.crawledRoots != null) {
            float priority = 0.0f;
            float increment = 1.0f / (float)this.crawledRoots.length;
            int i = 0;
            while (i < this.crawledRoots.length) {
                this.crawledRoots[i].setPriority(priority);
                priority += increment;
                ++i;
            }
            this.submit(this.crawledRoots);
        }
        this.state = 0;
        this.sendCrawlEvent(this.state);
        PriorityQueue priorityQueue = this.crawlQueue;
        synchronized (priorityQueue) {
            CrawlTimer timer = new CrawlTimer(this);
            int timeout = this.dp.getCrawlTimeout();
            if (timeout > 0) {
                timer.set(timeout * 1000, false);
            }
            int nWorms = Math.max(this.dp.getMaxThreads(), 1);
            this.worms = new Worm[nWorms];
            int i = 0;
            while (i < nWorms) {
                this.worms[i] = new Worm(this, i);
                this.worms[i].start();
                ++i;
            }
            try {
                while (this.state == 0) {
                    if (this.numPagesLeft == 0) {
                        this.state = 1;
                        this.sendCrawlEvent(this.state);
                        continue;
                    }
                    if (this.synchronous) {
                        Link link = (Link)this.crawlQueue.getMin();
                        if (link.getStatus() == 7) {
                            this.process(link);
                            continue;
                        }
                        this.crawlQueue.wait();
                        continue;
                    }
                    this.crawlQueue.wait();
                }
            }
            catch (InterruptedException e) {
                // empty catch block
            }
            timer.cancel();
            int i2 = 0;
            while (i2 < this.worms.length) {
                this.worms[i2].die();
                ++i2;
            }
            if (this.state == 4) {
                PriorityQueue priorityQueue2 = this.fetchQueue;
                synchronized (priorityQueue2) {
                    int i3 = 0;
                    while (i3 < this.worms.length) {
                        if (this.worms[i3].link != null) {
                            this.fetchQueue.put(this.worms[i3].link);
                        }
                        ++i3;
                    }
                }
            }
            this.worms = null;
        }
    }

    public void clear() {
        this.stop();
        this.numPagesVisited = 0;
        this.numLinksTested = 0;
        this.clearVisited();
        if (this.crawledRoots != null) {
            int i = 0;
            while (i < this.crawledRoots.length) {
                this.crawledRoots[i].disconnect();
                ++i;
            }
        }
        this.crawledRoots = null;
        this.state = 2;
        this.sendCrawlEvent(this.state);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void pause() {
        if (this.state == 0) {
            PriorityQueue priorityQueue = this.crawlQueue;
            synchronized (priorityQueue) {
                this.state = 4;
                this.crawlQueue.notify();
            }
            this.sendCrawlEvent(this.state);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void stop() {
        if (this.state == 0 || this.state == 4) {
            PriorityQueue priorityQueue = this.crawlQueue;
            synchronized (priorityQueue) {
                PriorityQueue priorityQueue2 = this.fetchQueue;
                synchronized (priorityQueue2) {
                    this.state = 1;
                    this.fetchQueue.clear();
                    this.crawlQueue.clear();
                    this.numPagesLeft = 0;
                    this.crawlQueue.notify();
                }
            }
            this.sendCrawlEvent(this.state);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    void timedOut() {
        if (this.state == 0) {
            PriorityQueue priorityQueue = this.crawlQueue;
            synchronized (priorityQueue) {
                PriorityQueue priorityQueue2 = this.fetchQueue;
                synchronized (priorityQueue2) {
                    this.state = 3;
                    this.fetchQueue.clear();
                    this.crawlQueue.clear();
                    this.numPagesLeft = 0;
                    this.crawlQueue.notify();
                }
            }
            this.sendCrawlEvent(this.state);
        }
    }

    public int getState() {
        return this.state;
    }

    public void visit(Page page) {
    }

    public boolean shouldVisit(Link l) {
        return true;
    }

    public void expand(Page page) {
        Link[] links = page.getLinks();
        if (links != null && links.length > 0) {
            float priority = this.depthFirst ? -this.numPagesVisited : this.numPagesVisited;
            float increment = 1.0f / (float)links.length;
            int i = 0;
            while (i < links.length) {
                Link l = links[i];
                l.setPriority(priority);
                priority += increment;
                l.setDownloadParameters(this.dp);
                ++this.numLinksTested;
                if (this.ignoreVisitedLinks && this.visited(l)) {
                    this.sendLinkEvent(l, 2);
                } else if (this.type != null && !l.hasAnyLabels(this.type) || this.domain != null && !l.hasAnyLabels(this.domain) || this.linkPredicate != null && !this.linkPredicate.shouldVisit(l) || !this.shouldVisit(l)) {
                    this.sendLinkEvent(l, 1);
                } else if (page.getDepth() >= this.maxDepth) {
                    this.sendLinkEvent(l, 3);
                } else {
                    this.submit(l);
                }
                ++i;
            }
        }
    }

    public int getPagesVisited() {
        return this.numPagesVisited;
    }

    public int getLinksTested() {
        return this.numLinksTested;
    }

    public int getPagesLeft() {
        return this.numPagesLeft;
    }

    public int getActiveThreads() {
        Worm[] w = this.worms;
        if (w == null) {
            return 0;
        }
        int n = 0;
        int i = 0;
        while (i < w.length) {
            if (w[i] != null && w[i].link != null) {
                ++n;
            }
            ++i;
        }
        return n;
    }

    public String getName() {
        return this.name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String toString() {
        return this.getName();
    }

    public Link[] getRoots() {
        if (this.roots == null) {
            return new Link[0];
        }
        Link[] result = new Link[this.roots.length];
        System.arraycopy(this.roots, 0, result, 0, this.roots.length);
        return result;
    }

    public Link[] getCrawledRoots() {
        if (this.crawledRoots == null) {
            return null;
        }
        Link[] result = new Link[this.crawledRoots.length];
        System.arraycopy(this.crawledRoots, 0, result, 0, this.crawledRoots.length);
        return result;
    }

    public String getRootHrefs() {
        StringBuffer buf = new StringBuffer();
        if (this.roots != null) {
            int i = 0;
            while (i < this.roots.length) {
                if (buf.length() > 0) {
                    buf.append('\n');
                }
                buf.append(this.roots[i].getURL().toExternalForm());
                ++i;
            }
        }
        return buf.toString();
    }

    public void setRootHrefs(String hrefs) throws MalformedURLException {
        Vector<Link> v = new Vector<Link>();
        StringTokenizer tok = new StringTokenizer(hrefs);
        while (tok.hasMoreElements()) {
            v.addElement(new Link(tok.nextToken()));
        }
        this.roots = new Link[v.size()];
        v.copyInto(this.roots);
    }

    public void setRoot(Link link) {
        this.roots = new Link[1];
        this.roots[0] = link;
    }

    public void setRoots(Link[] links) {
        this.roots = new Link[links.length];
        System.arraycopy(links, 0, this.roots, 0, links.length);
    }

    public void addRoot(Link link) {
        if (this.roots == null) {
            this.setRoot(link);
        } else {
            Link[] newroots = new Link[this.roots.length + 1];
            System.arraycopy(this.roots, 0, newroots, 0, this.roots.length);
            newroots[newroots.length - 1] = link;
            this.roots = newroots;
        }
    }

    public String[] getDomain() {
        return this.domain;
    }

    public void setDomain(String[] domain) {
        this.domain = domain;
    }

    public String[] getLinkType() {
        return this.type;
    }

    public void setLinkType(String[] type) {
        this.type = type;
    }

    public boolean getDepthFirst() {
        return this.depthFirst;
    }

    public void setDepthFirst(boolean useDFS) {
        this.depthFirst = useDFS;
    }

    public boolean getSynchronous() {
        return this.synchronous;
    }

    public void setSynchronous(boolean f) {
        this.synchronous = f;
    }

    public boolean getIgnoreVisitedLinks() {
        return this.ignoreVisitedLinks;
    }

    public void setIgnoreVisitedLinks(boolean f) {
        this.ignoreVisitedLinks = f;
    }

    public int getMaxDepth() {
        return this.maxDepth;
    }

    public void setMaxDepth(int maxDepth) {
        this.maxDepth = maxDepth;
    }

    public DownloadParameters getDownloadParameters() {
        return this.dp;
    }

    public void setDownloadParameters(DownloadParameters dp) {
        this.dp = dp;
    }

    public void setLinkPredicate(LinkPredicate pred) {
        if (pred == this.linkPredicate || pred != null && pred.equals(this.linkPredicate)) {
            return;
        }
        if (this.linkPredicate != null) {
            this.linkPredicate.disconnected(this);
        }
        this.linkPredicate = pred;
        if (this.linkPredicate != null) {
            this.linkPredicate.connected(this);
        }
    }

    public LinkPredicate getLinkPredicate() {
        return this.linkPredicate;
    }

    public void setPagePredicate(PagePredicate pred) {
        if (pred == this.pagePredicate || pred != null && pred.equals(this.pagePredicate)) {
            return;
        }
        if (this.pagePredicate != null) {
            this.pagePredicate.disconnected(this);
        }
        this.pagePredicate = pred;
        if (this.pagePredicate != null) {
            this.pagePredicate.connected(this);
        }
    }

    public PagePredicate getPagePredicate() {
        return this.pagePredicate;
    }

    public void setAction(Action act) {
        if (act == this.action || act != null && act.equals(this.action)) {
            return;
        }
        if (this.action != null) {
            this.action.disconnected(this);
        }
        this.action = act;
        if (this.action != null) {
            this.action.connected(this);
        }
    }

    public Action getAction() {
        return this.action;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void submit(Link link) {
        this.markVisited(link);
        this.sendLinkEvent(link, 4);
        PriorityQueue priorityQueue = this.crawlQueue;
        synchronized (priorityQueue) {
            PriorityQueue priorityQueue2 = this.fetchQueue;
            synchronized (priorityQueue2) {
                this.crawlQueue.put(link);
                ++this.numPagesLeft;
                this.fetchQueue.put(link);
                this.fetchQueue.notifyAll();
            }
        }
    }

    public void submit(Link[] links) {
        int i = 0;
        while (i < links.length) {
            this.submit(links[i]);
            ++i;
        }
    }

    public Enumeration enumerateQueue() {
        return this.crawlQueue.elements();
    }

    public void addClassifier(Classifier c) {
        if (!this.classifiers.contains(c)) {
            float cpriority = c.getPriority();
            int i = 0;
            while (i < this.classifiers.size()) {
                Classifier d = (Classifier)this.classifiers.elementAt(i);
                if (cpriority < d.getPriority()) {
                    this.classifiers.insertElementAt(c, i);
                    return;
                }
                ++i;
            }
            this.classifiers.addElement(c);
        }
    }

    public void removeClassifier(Classifier c) {
        this.classifiers.removeElement(c);
    }

    public void removeAllClassifiers() {
        this.classifiers.removeAllElements();
    }

    public Enumeration enumerateClassifiers() {
        return this.classifiers.elements();
    }

    public Classifier[] getClassifiers() {
        Object[] c = new Classifier[this.classifiers.size()];
        this.classifiers.copyInto(c);
        return c;
    }

    public void addCrawlListener(CrawlListener listen) {
        if (!this.crawlListeners.contains(listen)) {
            this.crawlListeners.addElement(listen);
        }
    }

    public void removeCrawlListener(CrawlListener listen) {
        this.crawlListeners.removeElement(listen);
    }

    public void addLinkListener(LinkListener listen) {
        if (!this.linkListeners.contains(listen)) {
            this.linkListeners.addElement(listen);
        }
    }

    public void removeLinkListener(LinkListener listen) {
        this.linkListeners.removeElement(listen);
    }

    protected void sendCrawlEvent(int id) {
        CrawlEvent evt = new CrawlEvent(this, id);
        int j = 0;
        int len = this.crawlListeners.size();
        while (j < len) {
            CrawlListener listen = (CrawlListener)this.crawlListeners.elementAt(j);
            switch (id) {
                case 0: {
                    listen.started(evt);
                    break;
                }
                case 1: {
                    listen.stopped(evt);
                    break;
                }
                case 2: {
                    listen.cleared(evt);
                    break;
                }
                case 3: {
                    listen.timedOut(evt);
                    break;
                }
                case 4: {
                    listen.paused(evt);
                }
            }
            ++j;
        }
    }

    protected void sendLinkEvent(Link l, int id) {
        LinkEvent evt = new LinkEvent(this, id, l);
        l.setStatus(id);
        int j = 0;
        int len = this.linkListeners.size();
        while (j < len) {
            LinkListener listen = (LinkListener)this.linkListeners.elementAt(j);
            listen.crawled(evt);
            ++j;
        }
    }

    protected void sendLinkEvent(Link l, int id, Throwable exception) {
        LinkEvent evt = new LinkEvent(this, id, l, exception);
        l.setStatus(id);
        l.setLabel("exception", exception.toString());
        int j = 0;
        int len = this.linkListeners.size();
        while (j < len) {
            LinkListener listen = (LinkListener)this.linkListeners.elementAt(j);
            listen.crawled(evt);
            ++j;
        }
    }

    public boolean visited(Link link) {
        return this.visitedPages.containsKey(link.getPageURL().toString());
    }

    protected void markVisited(Link link) {
        this.visitedPages.put(link.getPageURL().toString(), this);
    }

    protected void clearVisited() {
        this.visitedPages.clear();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     * Converted monitor instructions to comments
     * Lifted jumps to return sites
     */
    void fetch(Worm w) {
        WormTimer timer = new WormTimer(w);
        while (!w.dead) {
            PriorityQueue priorityQueue = this.fetchQueue;
            // MONITORENTER : priorityQueue
            while (!w.dead && (w.link = (Link)this.fetchQueue.deleteMin()) == null) {
                try {
                    this.fetchQueue.wait();
                }
                catch (InterruptedException e) {
                    // empty catch block
                }
            }
            // MONITOREXIT : priorityQueue
            if (w.dead) {
                return;
            }
            try {
                Object var8_13;
                DownloadParameters dp = w.link.getDownloadParameters();
                if (dp == null) {
                    dp = this.dp;
                }
                int timeout = dp.getDownloadTimeout();
                this.sendLinkEvent(w.link, 5);
                try {
                    if (timeout > 0) {
                        timer.set(timeout * 1000, false);
                    }
                    if (dp.getObeyRobotExclusion() && this.robotExclusion.disallowed(w.link.getURL())) {
                        throw new IOException("disallowed by Robot Exclusion Standard (robots.txt)");
                    }
                    Page page = new Page(w.link, dp);
                    var8_13 = null;
                    timer.cancel();
                }
                catch (Throwable throwable) {
                    var8_13 = null;
                    timer.cancel();
                    throw throwable;
                }
                if (w.dead) {
                    return;
                }
                this.sendLinkEvent(w.link, 7);
                if (this.synchronous) {
                    PriorityQueue priorityQueue2 = this.crawlQueue;
                    // MONITORENTER : priorityQueue2
                    this.crawlQueue.notify();
                    // MONITOREXIT : priorityQueue2
                } else {
                    this.process(w.link);
                }
                w.link = null;
            }
            catch (ThreadDeath e) {
                throw e;
            }
            catch (Throwable e) {
                if (w.dead) {
                    return;
                }
                this.sendLinkEvent(w.link, 6, e);
                PriorityQueue priorityQueue3 = this.crawlQueue;
                // MONITORENTER : priorityQueue3
                this.crawlQueue.delete(w.link);
                --this.numPagesLeft;
                w.link = null;
                this.crawlQueue.notify();
                // MONITOREXIT : priorityQueue3
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    void process(Link link) {
        Page page = link.getPage();
        int j = 0;
        int len = this.classifiers.size();
        while (j < len) {
            Classifier cl = (Classifier)this.classifiers.elementAt(j);
            cl.classify(page);
            ++j;
        }
        ++this.numPagesVisited;
        if (this.pagePredicate == null || this.pagePredicate.shouldActOn(page)) {
            if (this.action != null) {
                this.action.visit(page);
            }
            this.visit(page);
        }
        this.expand(page);
        this.sendLinkEvent(link, 8);
        PriorityQueue priorityQueue = this.crawlQueue;
        synchronized (priorityQueue) {
            this.crawlQueue.delete(link);
            --this.numPagesLeft;
            this.crawlQueue.notify();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    void fetchTimedOut(Worm w, int interval) {
        if (w.dead) {
            return;
        }
        w.die();
        this.sendLinkEvent(w.link, 6, new IOException("Timeout after " + interval + " seconds"));
        PriorityQueue priorityQueue = this.crawlQueue;
        synchronized (priorityQueue) {
            this.crawlQueue.delete(w.link);
            --this.numPagesLeft;
            this.worms[w.i] = new Worm(this, w.i);
            this.worms[w.i].start();
            this.crawlQueue.notify();
        }
    }

    public static void main(String[] args) throws Exception {
        ObjectInputStream in = new ObjectInputStream(new FileInputStream(args[0]));
        Crawler loadedCrawler = (Crawler)in.readObject();
        in.close();
        EventLog.monitor(loadedCrawler).setOnlyNetworkEvents(false);
        loadedCrawler.run();
    }

    static {
        serialVersionUID = -3757789861952010450L;
        WEB = null;
        SERVER = new String[]{"local"};
        SUBTREE = new String[]{"sibling", "descendent"};
        HYPERLINKS = new String[]{"hyperlink"};
        HYPERLINKS_AND_IMAGES = new String[]{"hyperlink", "image"};
        ALL_LINKS = null;
    }
}

