/** * Extracts all text nodes from the given NodeList. Non-breaking spaces * are replaced by normal space characters. Subsequent whitespace characters * are collapsed to a single character. */ public static String toText(NodeList nodeList) { try { StringBean toStringVisitor = new StringBean(); nodeList.visitAllNodesWith(toStringVisitor); return toStringVisitor.getStrings(); } catch (ParserException e) { return null; } }
private void fillMap(String documentationFile) throws IOException { InputStream resourceAsStream = getClass().getResourceAsStream(documentationFile); try { BufferedReader reader = new BufferedReader(new InputStreamReader(resourceAsStream)); StringBuilder sb = new StringBuilder(); while (true) { String line; line = reader.readLine(); if (line == null) { break; } sb.append(line + "\n"); } String document = sb.toString(); Parser parser = new Parser(document); NodeList list = parser.parse(null); HtmlDocumentationVisitor visitor = new HtmlDocumentationVisitor(document); list.visitAllNodesWith(visitor); map.putAll(visitor.getMap()); } catch (Exception e) { RutaIdeUIPlugin.error(e); } } }
private void processHTMLBodyWithBASE64Images(MimeMultipart multipart) throws ParserException, MessagingException, NoSuchAlgorithmException, SMIMEException, java.security.NoSuchProviderException { if (null != body && body.contains("base64")) { Parser parser = new Parser(body); NodeList nodeList = parser.parse(null); HtmlImageNodeVisitor htmlImageNodeVisitor = new HtmlImageNodeVisitor(); nodeList.visitAllNodesWith(htmlImageNodeVisitor); body = nodeList.toHtml(); addAllBase64ImagesToMimeMultipart(multipart, htmlImageNodeVisitor.getBase64Images()); } }
HtmlConverterVisitor visitor = new HtmlConverterVisitor(newlineInducingTags, newlineInducingTagRegExp, gapInducingTags, gapText, skipWhitespaces, processAll); list.visitAllNodesWith(visitor); visibleSpansSoFar = visitor.getTextSpans(); linebreaksFromHtmlTags = visitor.getLinebreaksFromHtmlTags();
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { String documentText = jcas.getDocumentText(); List<AnnotationFS> annotations = new ArrayList<AnnotationFS>(); List<AnnotationFS> annotationStack = new ArrayList<AnnotationFS>(); try { Parser parser = new Parser(documentText); NodeList list = parser.parse(null); HtmlVisitor visitor = new HtmlVisitor(jcas, onlyContent); list.visitAllNodesWith(visitor); annotations = visitor.getAnnotations(); annotationStack = visitor.getAnnotationStack(); } catch (ParserException e) { throw new AnalysisEngineProcessException(e); } for (AnnotationFS each : annotations) { if (each.getBegin() < each.getEnd()) { jcas.addFsToIndexes(each); } } for (AnnotationFS each : annotationStack) { if (each.getBegin() < each.getEnd()) { jcas.addFsToIndexes(each); } } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { String documentText = jcas.getDocumentText(); List<AnnotationFS> annotations = new ArrayList<AnnotationFS>(); List<AnnotationFS> annotationStack = new ArrayList<AnnotationFS>(); try { Parser parser = new Parser(documentText); NodeList list = parser.parse(null); HtmlVisitor visitor = new HtmlVisitor(jcas, onlyContent); list.visitAllNodesWith(visitor); annotations = visitor.getAnnotations(); annotationStack = visitor.getAnnotationStack(); } catch (ParserException e) { throw new AnalysisEngineProcessException(e); } for (AnnotationFS each : annotations) { if (each.getBegin() < each.getEnd()) { jcas.addFsToIndexes(each); } } for (AnnotationFS each : annotationStack) { if (each.getBegin() < each.getEnd()) { jcas.addFsToIndexes(each); } } }