public static void main(String[] args) { Spider spider = Spider.create(new ZipCodePageProcessor()).scheduler(new PriorityScheduler()).addUrl("http://www.ip138.com/post/"); spider.run(); } }
public static void main(String[] args) { OOSpider.create(Site.me(), News163.class).addUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html") .scheduler(new RedisScheduler("localhost")).addPipeline(new MultiPagePipeline()).addPipeline(new ConsolePipeline()).run(); }
public static void main(String[] args) { OOSpider.create(Site.me().setDomain("www.jokeji.cn").setCharset("gbk").setSleepTime(100).setTimeOut(3000) .setUserAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)") , new ConsolePageModelPipeline(), JokejiModel.class).addUrl("http://www.jokeji.cn/").thread(2) .scheduler(new RedisScheduler("127.0.0.1")) .run(); }
public static void main(String[] args) { JedisPool pool = new JedisPool(new JedisPoolConfig(), "127.0.0.1"); Spider.create(new UserDetailInfoProcessor()).addUrl(START_URL) .scheduler(new RedisScheduler(pool)) .thread(1).run(); }
public void crawl() { JedisPool pool = new JedisPool(new JedisPoolConfig(), "127.0.0.1"); pool.getResource().select(2); Spider.create(new UserDetailInfoProcessor()) .addUrl(START_URL) .addPipeline(userDetailInfoPipeline) //.setScheduler(new FileCacheQueueScheduler("/usr/zhihu/cache")) //.setDownloader(new HttpClientDownloaderExtend("/about")) .scheduler(new RedisSchedulerExtend2(pool,1,QueueNameConstant.QUEUE_USER_DETAIL_INFO)) .thread(1).run(); }
public static void main(String[] args) { Spider spider = Spider.create(new ZipCodePageProcessor()).scheduler(new PriorityScheduler()).addUrl("http://www.ip138.com/post/"); spider.run(); } }
public void crawl() { RedisUtil.init(); List<BookTag> bookTag = bookTagMapper.selectAll(); Request[] requests = new Request[bookTag.size()]; int i = 0; for(BookTag tag : bookTag) { requests[i++] = new Request(tag.getUrl()).setPriority(0); } Spider.create(new BookInfoProcessor()) .addRequest(requests) // .addUrl("http://www.ip138.com/") .addPipeline(bookInfoPipeline) .scheduler(new RedisScheduler(pool,Integer.parseInt(ConfigUtil.getProperty("redis", "redis.index")),QueueNameConstant.QUEUE_BOOK_INFO)) .thread(1).run(); } public static void main(String[] args)
public static void main(String[] args) { OOSpider.create(Site.me(), News163.class).addUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html") .scheduler(new RedisScheduler("localhost")).addPipeline(new MultiPagePipeline()).addPipeline(new ConsolePipeline()).run(); }
public static void main(String[] args) { OOSpider.create(Site.me().setDomain("www.jokeji.cn").setCharset("gbk").setSleepTime(100).setTimeOut(3000) .setUserAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)") , new ConsolePageModelPipeline(), JokejiModel.class).addUrl("http://www.jokeji.cn/").thread(2) .scheduler(new RedisScheduler("127.0.0.1")) .run(); }