package cn.edu.hfut.dmic.webcollector.example;

import cn.edu.hfut.dmic.webcollector.fetcher.NextFilter;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.rocks.BreadthCrawler;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/example/DemoNextFilter.class */
public class DemoNextFilter extends BreadthCrawler {
    public DemoNextFilter(String str, boolean z) {
        super(str, z);
        addSeed("https://blog.csdn.net/");
        addRegex(".*");
        setThreads(30);
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void visit(Page page, CrawlDatums crawlDatums) {
        if (page.matchType("content")) {
            System.out.println("title:" + page.select("h1.title-article").first().text() + "\tauthor:" + page.select("p.name>a.text-truncate").first().text());
        }
    }

    public static void main(String[] strArr) throws Exception {
        DemoNextFilter demoNextFilter = new DemoNextFilter("crawl", true);
        demoNextFilter.setNextFilter(new NextFilter() { // from class: cn.edu.hfut.dmic.webcollector.example.DemoNextFilter.1
            @Override // cn.edu.hfut.dmic.webcollector.fetcher.NextFilter
            public CrawlDatum filter(CrawlDatum crawlDatum, CrawlDatum crawlDatum2) {
                if (!crawlDatum.matchUrl("https://blog.csdn.net/.*/article/details/.*")) {
                    return null;
                }
                crawlDatum.type("content");
                return crawlDatum;
            }
        });
        demoNextFilter.start(2);
    }
}
