package cn.edu.hfut.dmic.webcollector.example;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.rocks.BreadthCrawler;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/example/DemoExceptionCrawler.class */
public class DemoExceptionCrawler extends BreadthCrawler {
    public DemoExceptionCrawler(String str, boolean z) {
        super(str, z);
        addSeed("https://blog.github.com/");
        for (int i = 2; i <= 5; i++) {
            addSeed(String.format("https://blog.github.com/page/%d/", Integer.valueOf(i)));
        }
        addRegex("https://blog.github.com/[0-9]{4}-[0-9]{2}-[0-9]{2}-[^/]+/");
        setThreads(50);
        getConf().setTopN(100);
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void visit(Page page, CrawlDatums crawlDatums) {
        String url = page.url();
        if (page.matchUrl("https://blog.github.com/[0-9]{4}-[0-9]{2}-[0-9]{2}[^/]+/")) {
            String text = page.select("h1[class=lh-condensed]").first().text();
            String selectText = page.selectText("div.content.markdown-body");
            System.out.println("URL:\n" + url);
            System.out.println("title:\n" + text);
            System.out.println("content:\n" + selectText);
        }
    }

    public static void main(String[] strArr) throws Exception {
        new DemoExceptionCrawler("crawl", true).start(4);
    }
}
