@Override public List<String> process(File inItem) { List<String> outItems = null; try { BufferedReader in = new BufferedReader( new FileReader(inItem) ); String s; in.readLine();//pass first line s = in.readLine(); if (s != null) { Json json = new Json(s); outItems = json.jsonPath("$.data[*].[*]").all(); } in.close(); } catch (IOException e) { e.printStackTrace(); } return outItems; } }
@Override public List<String> process(File inItem) { String s = readFollowees(inItem); if (!StringUtils.isEmpty(s)) { Json json = new Json(s); List<String> tokens = json.jsonPath("$.data[*].url_token").all(); tokens.forEach(urlTokens::add); } return null; }
@Override public List<Document> process(File inItem) { String s = MemberURLTokenGenerator.readFollowees(inItem); List<Document> documents = null; if (!StringUtils.isEmpty(s)) { documents = new ArrayList<>(20); Json json = new Json(s); List<String> users = json.jsonPath("$.data[*].[*]").all(); List<String> ids = json.jsonPath("$.data[*].id").all(); int i = 0; for (String id : ids) { if (!duplicateRemover.isDuplicate(id)) { documents.add(new Document(id, users.get(i))); } i++; } } return documents; }
@Override public List<Document> process(File inItem) { String s = readMember(inItem); List<Document> documents = null; if (!StringUtils.isEmpty(s)) { documents = new ArrayList<>(1); Json json = new Json(s); String id = json.jsonPath("$.id").get(); if (!duplicateRemover.isDuplicate(id)) { documents.add(new Document(id, s)); } } return documents; }
public void process(Page page) { Json json = page.getJson(); //System.out.println(json); page.putField(ZhihuPipeline.URL, page.getUrl()); page.putField(ZhihuPipeline.RESPONSE, json); String isEnd = json.jsonPath("$.paging.is_end").get(); if (!Boolean.parseBoolean(isEnd)) { page.addTargetRequest(json.jsonPath("$.paging.next").get()); } List<String> urlTokens = json.jsonPath("$.data[*].url_token").all(); List<String> urls = generateFolloweeUrls(urlTokens); page.addTargetRequests(urls); }