/** * 文本预处理 * 英文小写 -> 移除code -> 移除Html标签 */ private String preHandle(String content){ content = content.toLowerCase(); content = content.replaceAll(" ", "").replaceAll("<code[\\s\\S]*?</code>", ""); return Jsoup.parse(content).text(); }
@Override public List<String> getURLsFromPage(Document doc) { List<String> result = new ArrayList<>(); String json = doc.text().replaceAll("var galleryinfo =", ""); JSONArray json_data = new JSONArray(json); for (int i = 0; i < json_data.length(); i++) { result.add("https://ba.hitomi.la/galleries/" + galleryId + "/" + json_data.getJSONObject(i).getString("name")); } return result; }
public static String getText(String html) { if (StrUtils.isBlank(html)) { return html; } return Jsoup.parse(html).text(); }
private String getOpenId(String accessToken) throws IOException{ String url = openIdUri + accessToken; Document document = Jsoup.connect(url).get(); String resultText = document.text(); Matcher matcher = Pattern.compile("\"openid\":\"(.*?)\"").matcher(resultText); if (matcher.find()){ return matcher.group(1); } return null; }
/** * Article API * @param URL * @param JSONObject genericScraperData * @return genericScraperData */ public JSONObject articleAPI (String url, JSONObject genericScraperData) throws MalformedURLException{ URL qurl = new URL(url); String data = ""; try { data = null;// ArticleExtractor.INSTANCE.getText(qurl); genericScraperData.put("query", qurl); genericScraperData.put("data", data); genericScraperData.put("NLP", "true"); } catch (Exception e) { if ("".equals(data)) { try { Document htmlPage = Jsoup.connect(url).get(); data = htmlPage.text(); genericScraperData.put("query", qurl); genericScraperData.put("data", data); genericScraperData.put("NLP", "false"); } catch (Exception ex) {} } } return genericScraperData; }
private GithubToken getToken(String code, String state) throws IOException{ Document document = JsoupUtils.getDocWithPC(accessTokenUri + "?client_id=" + clientId + "&code=" + code + "&client_secret=" + clientSecret + "&redirect_uri=" + redirectUri + "&state=" + state); String tokenResult = document.text(); String[] results = tokenResult.split("&"); if (results.length == 3){ GithubToken githubToken = new GithubToken(); String accessToken = results[0].replace("access_token=", ""); String scope = results[1].replace("scope=", ""); String tokenType = results[2].replace("token_type=", ""); githubToken.setAccessToken(accessToken); githubToken.setScope(scope); githubToken.setTokenType(tokenType); return githubToken; } return null; }
private JSONObject getJSON(String page, String apiKey) { URL pageURL = null; String apiURL = null; try { apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey); pageURL = new URL(apiURL); } catch (MalformedURLException e) { LOGGER.error("Unable to get api link " + apiURL + " is malformed"); } try { LOGGER.info(Http.url(pageURL).ignoreContentType().get().text()); return new JSONObject(Http.url(pageURL).ignoreContentType().get().text()); } catch (IOException e) { LOGGER.error("Unable to get api link " + apiURL + " is malformed"); return null; } }
private QQToken getToken(String tokenAccessApi) throws IOException{ Document document = Jsoup.connect(tokenAccessApi).get(); String tokenResult = document.text(); String[] results = tokenResult.split("&"); if (results.length == 3){ QQToken qqToken = new QQToken(); String accessToken = results[0].replace("access_token=", ""); int expiresIn = Integer.valueOf(results[1].replace("expires_in=", "")); String refreshToken = results[2].replace("refresh_token=", ""); qqToken.setAccessToken(accessToken); qqToken.setExpiresIn(expiresIn); qqToken.setRefresh_token(refreshToken); return qqToken; } return null; }
desc = Jsoup.parse(description).text(); } else { desc = description;
private User getUserInfo(String accessToken, String openId) { String url = String.format(USER_INFO_API, userInfoUri, accessToken, clientId, openId); Document document = JsoupUtils.getDocWithPC(url); String resultText = document.text(); JSONObject json = JSON.parseObject(resultText); // 解析 QQ 用户信息 User user = new User(); user.setOpenId(openId); user.setNickname(json.getString("nickname")); user.setGender(json.getString("gender")); user.setAvatar(json.getString("figureurl_qq_2")); String meta = json.getString("year") + " " + json.getString("province"); user.setMeta(meta); user.setLoginType(LoginTypeEnum.QQ.name()); String md5 = DigestUtils.md5DigestAsHex(resultText.getBytes()); user.setMd5(md5); // 更新 QQ 用户在本站的信息 return userService.updateUserInfo(user); }
private User getUserInfo(String accessToken) { Document document = JsoupUtils.getDocWithPC(userInfoUri + accessToken); String resultText = document.text(); JSONObject json = JSON.parseObject(resultText); // 解析 Github 用户信息 User user = new User(); user.setOpenId(json.getString("id")); user.setNickname(json.getString("login")); user.setGender("男"); user.setAvatar(json.getString("avatar_url")); String location = json.getString("location"); String name = json.getString("name"); String company = json.getString("company"); String meta = (location == null ? "" : location) + " " + (name == null ? "" : name) + " " + (company == null ? "" : company); user.setMeta(meta); user.setLoginType(LoginTypeEnum.GIT.name()); String md5 = DigestUtils.md5DigestAsHex(resultText.getBytes()); user.setMd5(md5); // 更新 Github 用户在本站的信息 return userService.updateUserInfo(user); }
/** * 获取片段播放的 key */ private String videoKey(String vid, String filename, String format) { try { Document document = Jsoup.connect(KEY_API).header("Cookie", COOKIE) .data("vid", vid).data("platform", PLATFORM) .data("otype", "json") .data("filename", filename).data("sdtfrom", SDTFROM) .data("format", format).data("guid", GUID).ignoreContentType(true).get(); String result = document.text().replace("QZOutputJson=", ""); System.out.println(result); result = result.substring(0, result.length() - 1); return JSONObject.parseObject(result).getString("key"); } catch (IOException e) { log.info("request tencent video part api error, vid : " + vid); throw new AnyException("request tencent api error, vid : " + vid); } } }
String routeUrl = String.format(ROUTE, vid, getTkey()); Document document = JsoupUtils.getDocWithPC(routeUrl); JSONObject object = JSONObject.parseObject(document.text()); JSONObject playurl = object.getJSONObject("msgs").getJSONObject("playurl"); String title = playurl.getString("title"); String text = StringEscapeUtils.unescapeJava(result.text()); text = text.replace("vjs_149067353337651(", ""); text = text.replace(");", "");
/** * 调用腾讯接口,获取视频信息 */ private String videoInfo(String vid) { try { Document document = Jsoup.connect(VIDEO_API).header("Cookie", COOKIE) .data("vids", vid).data("platform", PLATFORM) .data("sdtfrom", SDTFROM) .data("format", "10209") .data("otype", "json").data("defn", "fhd") .data("defaultfmt", "fhd").data("guid", GUID).ignoreContentType(true).get(); String result = document.text().replace("QZOutputJson=", ""); return result.substring(0, result.length() - 1); } catch (IOException e) { log.info("request tencent api error, vid : " + vid); throw new AnyException("request tencent api error, vid : " + vid); } }
tmp = Jsoup.parse(tmp).text();
String value = atts.getValue(key); if (HTML_DETECTION_PATTERN.matcher(value).find()) { value = Jsoup.parse(value).text();
private String capitalizeFirstLetter(String choice) { if (choice == null || choice.isEmpty()) { return choice; } choice = Jsoup.parse(choice).text(); // decode HTML entities and strip tags return choice.substring(0, 1).toUpperCase(Locale.ENGLISH) + choice.substring(1); }
@Override public void paint(Graphics g) { // Fix #4012: ArrayIndexOutOfBoundsException on paint() // Fix #38546: ConcurrentModificationException on getText() // Fix #37872: IndexOutOfBoundsException on getText() // Fix #48915: ClassCastException on paint() try { super.paint(g); if ("".equals(Jsoup.parse(this.getText()).text().trim())) { this.drawPlaceholder(this, g, this.placeholderText); } } catch (ConcurrentModificationException | IndexOutOfBoundsException | ClassCastException e) { LOGGER.error(e.getMessage(), e); } }
public static String html2text(String html) { if (html == null) { return null; } return Jsoup.parse(html).text(); }
@Test public void testConvertToSendgrid() { EmailWrapper wrapper = getTypicalEmailWrapper(); Mail email = new SendgridService().parseToEmail(wrapper); assertEquals(wrapper.getSenderEmail(), email.getFrom().getEmail()); assertEquals(wrapper.getSenderName(), email.getFrom().getName()); assertEquals(wrapper.getRecipient(), email.personalization.get(0).getTos().get(0).getEmail()); assertEquals(wrapper.getBcc(), email.personalization.get(0).getBccs().get(0).getEmail()); assertEquals(wrapper.getReplyTo(), email.getReplyto().getEmail()); assertEquals(wrapper.getSubject(), email.getSubject()); assertEquals("text/plain", email.getContent().get(0).getType()); assertEquals(Jsoup.parse(wrapper.getContent()).text(), email.getContent().get(0).getValue()); assertEquals("text/html", email.getContent().get(1).getType()); assertEquals(wrapper.getContent(), email.getContent().get(1).getValue()); }