public static String removeSpans(String html) { Source source = new Source(html); source.fullSequentialParse(); OutputDocument outputDocument = new OutputDocument(source); List<Tag> tags = source.getAllTags(); for (Tag tag : tags) { String tagname = tag.getName().toLowerCase(); if (tagname.equals("span")) { //remove the <span> outputDocument.remove(tag); } } return outputDocument.toString(); }
void replaceInOutputDocument(final OutputDocument outputDocument) { for (FormControl formControl : formControls) outputDocument.replace(formControl); } }
/** * Register mandatory attributes for Scoped CSS */ private void registerMandatoryAttributes(Attributes attributes) { // see https://sourceforge.net/p/jerichohtml/discussion/350024/thread/501a7d05/ Map<String, String> attrs = context.getMandatoryAttributes(); if (!attrs.isEmpty()) { for (Entry<String, String> i : attrs.entrySet()) { String v = i.getValue(); if (v == null) { outputDocument.insert(attributes.getBegin(), " " + i.getKey() + " "); } else { outputDocument.insert(attributes.getBegin(), " " + i.getKey() + "\"" + v + "\" "); } } } }
public String filter(String key, String value) { Source source = new Source(value); OutputDocument document = new OutputDocument(source); replaceChildren(source, source, document); return document.toString(); }
/** * Causes the specified range of the source text to be ignored when parsing. * <p> * See the documentation of the {@link Segment#ignoreWhenParsing()} method for more information. * * @param begin the beginning character position in the source text. * @param end the end character position in the source text. */ public void ignoreWhenParsing(final int begin, final int end) { if (wasFullSequentialParseCalled()) throw new IllegalStateException("ignoreWhenParsing can not be used after a full sequential parse has been performed"); if (parseTextOutputDocument==null) { parseTextOutputDocument=new OutputDocument(getParseText()); parseText=null; } parseTextOutputDocument.replaceWithSpaces(begin,end); }
private String removeNotAllowedTags(String htmlFragment, URI docUri) { Source source = new Source(htmlFragment); OutputDocument outputDocument = new OutputDocument(source); List<Element> elements = source.getAllElements(); Map<String, String> attrsUpdate = outputDocument.replace(attrs, true); if (!element.getName().contains("a")) { attrsUpdate.clear(); outputDocument.remove(element); || element.getName() == "style" || element.getName() == "form") { outputDocument.remove(content); outputDocument.remove(element.getStartTag()); outputDocument.remove(element.getEndTag()); String out = outputDocument.toString(); out = out.replaceAll("\\n", ""); out = out.replaceAll("\\t", "");
OutputDocument outputDocument = new OutputDocument(source); CharacterReference characterReference = (CharacterReference) segment; } else { outputDocument.replace(segment, doProcessText(segment.toString())); return outputDocument.toString();
void replaceInOutputDocument(final OutputDocument outputDocument) { if (outputStyle==FormControlOutputStyle.REMOVE) { outputDocument.remove(getElement()); } else if (outputStyle==FormControlOutputStyle.DISPLAY_VALUE) { outputDocument.replace(getElement(),getDisplayValueHTML(getValue(),true)); } else { replaceAttributesInOutputDocumentIfModified(outputDocument); if (value!=UNCHANGED) outputDocument.replace(getElement().getContent(),CharacterReference.encode(value,false)); } } private String getValue() {
buf.append(")"); document.insert(element.getBegin(), buf.toString()); // 插入块指令 document.remove(new Segment(source, attribute.getBegin() - 1, attribute.getEnd())); // 移除属性 if (oriattr != null) { String buf = String.format("#if(%s)%s=\"%s\"#end()", expression, oriattr.getName(), oriattr.getValue()); document.replace(new Segment(source, oriattr.getBegin(), oriattr.getEnd()), buf); document.remove(new Segment(source, attribute.getBegin(), attribute.getEnd())); // 移除ifattr控制属性 document.replace(new Segment(source, attribute.getBegin(), attribute.getEnd()), buf); document.remove(new Segment(source, attribute.getBegin(), attribute.getEnd())); // 移除setattr控制属性 document.insert(element.getEnd(), buf.toString()); // 插入结束指令
OutputDocument outputDocument=new OutputDocument(source); for (Element element : source.getAllElementsByClass("constantValuesContainer")) outputDocument.replace(childContent, String.valueOf((char)value)); outputDocument.writeTo( new OutputStreamWriter(new FileOutputStream(file.toFile()))); System.out.println("Processing "+file+" DONE");
private boolean processTag(Tag tag, OutputDocument outputDocument) { String elementName = tag.getName().toLowerCase(); if (!allowedTags.contains(elementName)) { return false; } if (tag.getTagType() == StartTagType.NORMAL) { Element element = tag.getElement(); if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) { if (element.getEndTag() == null) { return false; } } else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName) && element.getEndTag() == null) { outputDocument.insert(element.getEnd(), getEndTagHTML(elementName)); } outputDocument.replace(tag, getStartTagHTML(element.getStartTag())); return true; } if (tag.getTagType() == EndTagType.NORMAL) { if (tag.getElement() == null) { return false; } outputDocument.replace(tag, getEndTagHTML(elementName)); return true; } return false; }
public OutputDocument getOutputDocument() { return new OutputDocument(fSource); }
/** * Removes all the segments from this output document represented by the specified source {@linkplain Segment} objects. * <p> * This is equivalent to the following code:<pre> * for (Iterator i=segments.iterator(); i.hasNext();) * {@link #remove(Segment) remove}((Segment)i.next());</pre> * * @param segments a collection of segments to remove, represented by source {@link Segment} objects. */ public void remove(final Collection<? extends Segment> segments) { for (Segment segment : segments) remove(segment); }
/** * Returns the {@linkplain ParseText parse text} of this source document. * <p> * This method is normally only of interest to users who wish to create <a href="TagType.html#Custom">custom tag types</a>. * <p> * The parse text is defined as the entire text of the source document in lower case, with all * {@linkplain Segment#ignoreWhenParsing() ignored} segments replaced by space characters. * * @return the {@linkplain ParseText parse text} of this source document. */ public final ParseText getParseText() { if (parseText==null) { if (parseTextOutputDocument!=null) { parseText=new CharSequenceParseText(parseTextOutputDocument.toString()); parseTextOutputDocument=null; } else { parseText=new CharSequenceParseText(sourceText); } } return parseText; }
Source sourceHtml = new Source(source); sourceHtml.setLogger(null); OutputDocument outputDocument = new OutputDocument(sourceHtml); List<StartTag> tags = sourceHtml.getAllStartTags(FORMULA_TAG_NAME); for (StartTag tag : tags) { continue; outputDocument.replace(texElement.getStartTag(), TEX_SCRIPT_TAG_START); String content = texElement.getContent().toString().trim(); Pair<Integer, Integer> bounds = getBounds(content); if(bounds.getRight() == 0){ logger.info("Empty source in Tex tag"); outputDocument.replace(texElement.getContent(), StringUtils.EMPTY); } else { String strippedContent = content.substring(bounds.getLeft(), bounds.getRight()); String unescapedContent = StringEscapeUtils.unescapeHtml4(strippedContent); outputDocument.replace(texElement.getContent(), unescapedContent); outputDocument.replace(texElement.getEndTag(), TEX_SCRIPT_TAG_END); outputDocument.remove(tag); outputDocument.remove(endTag); return outputDocument.toString();
public String filter(String key, String value) { Source source = new Source(value); OutputDocument document = new OutputDocument(source); replaceChildren(source, source, document); return document.toString(); }
void replaceInOutputDocument(final OutputDocument outputDocument) { if (outputStyle==FormControlOutputStyle.REMOVE) { outputDocument.remove(getElement()); } else if (outputStyle==FormControlOutputStyle.DISPLAY_VALUE) { String output=null; if (formControlType!=FormControlType.HIDDEN) { String value=elementContainer.getAttributeValue(Attribute.VALUE); if (formControlType==FormControlType.PASSWORD && value!=null) value=getString(FormControlOutputStyle.ConfigDisplayValue.PasswordChar,value.length()); output=getDisplayValueHTML(value,false); } outputDocument.replace(getElement(),output); } else { replaceAttributesInOutputDocumentIfModified(outputDocument); } } }
buf.append(")"); document.insert(element.getBegin(), buf.toString()); // 插入块指令 document.remove(new Segment(source, attribute.getBegin() - 1, attribute.getEnd())); // 移除属性 if (oriattr != null) { String buf = String.format("#if(%s)%s=\"%s\"#end()", expression, oriattr.getName(), oriattr.getValue()); document.replace(new Segment(source, oriattr.getBegin(), oriattr.getEnd()), buf); document.remove(new Segment(source, attribute.getBegin(), attribute.getEnd())); // 移除ifattr控制属性 document.replace(new Segment(source, attribute.getBegin(), attribute.getEnd()), buf); document.remove(new Segment(source, attribute.getBegin(), attribute.getEnd())); // 移除setattr控制属性 document.insert(element.getEnd(), buf.toString()); // 插入结束指令
OutputDocument doc = new OutputDocument(source); List<Tag> tags = source.getAllTags(); int pos = 0;
/** * Constructs a new output document based on the specified {@link Segment}. * @param segment the original {@link Segment}. */ public OutputDocument(final Segment segment) { if (segment==null) throw new IllegalArgumentException("segment argument must not be null"); this.segment=segment; Source source=segment.source; this.sourceText=source; if (segment.begin>0) remove(0,segment.begin); if (segment.end<source.end) remove(segment.end,source.end); }