1 | | - | package io.onedev.commons.utils; |
2 | | - | |
3 | | - | import java.util.Collection; |
4 | | - | import java.util.regex.Matcher; |
5 | | - | |
6 | | - | import org.jsoup.Jsoup; |
7 | | - | import org.jsoup.nodes.Attribute; |
8 | | - | import org.jsoup.nodes.Document; |
9 | | - | import org.jsoup.nodes.Element; |
10 | | - | import org.jsoup.nodes.Node; |
11 | | - | import org.jsoup.nodes.TextNode; |
12 | | - | import org.jsoup.safety.Cleaner; |
13 | | - | import org.jsoup.safety.Whitelist; |
14 | | - | import org.unbescape.html.HtmlEscape; |
15 | | - | |
16 | | - | import com.google.common.collect.Lists; |
17 | | - | |
18 | | - | public class HtmlUtils { |
19 | | - | |
20 | | - | private static final String[] SAFE_TAGS = new String[] { "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", |
21 | | - | "i", "strong", "em", "a", "pre", "code", "img", "tt", "div", "ins", "del", "sup", "sub", "p", "ol", "ul", |
22 | | - | "li", "table", "thead", "tbody", "tfoot", "th", "tr", "td", "rt", "rp", "blockquote", "dl", "dt", "dd", |
23 | | - | "kbd", "q", "hr", "strike", "caption", "cite", "col", "colgroup", "small", "span", "u", "input", "video", "source"}; |
24 | | - | |
25 | | - | private static final String[] SAFE_ATTRIBUTES = new String[] { "abbr", "accept", "accept-charset", "accesskey", |
26 | | - | "action", "align", "alt", "axis", "border", "cellpadding", "cellspacing", "char", "charoff", "charset", |
27 | | - | "checked", "cite", "clear", "cols", "colspan", "color", "compact", "coords", "datetime", "details", "dir", |
28 | | - | "disabled", "enctype", "for", "frame", "headers", "height", "hreflang", "hspace", "ismap", "label", "lang", |
29 | | - | "longdesc", "maxlength", "media", "method", "multiple", "name", "nohref", "noshade", "nowrap", "prompt", |
30 | | - | "readonly", "rel", "rev", "rows", "rowspan", "rules", "scope", "selected", "shape", "size", "span", "start", |
31 | | - | "style", "summary", "tabindex", "target", "title", "type", "usemap", "valign", "value", "vspace", "width", |
32 | | - | "itemprop", "class", "controls", "id"}; |
33 | | - | |
34 | | - | private static final String[] SAFE_ANCHOR_SCHEMES = new String[] { "http", "https", "mailto", }; |
35 | | - | |
36 | | - | private static final Whitelist whiteList; |
37 | | - | |
38 | | - | static { |
39 | | - | whiteList = new Whitelist() { |
40 | | - | |
41 | | - | @Override |
42 | | - | protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { |
43 | | - | if (attr.getKey().startsWith("data-")) |
44 | | - | return true; |
45 | | - | else |
46 | | - | return super.isSafeAttribute(tagName, el, attr); |
47 | | - | } |
48 | | - | |
49 | | - | }; |
50 | | - | |
51 | | - | whiteList.addTags(SAFE_TAGS) |
52 | | - | .addAttributes("a", "href", "title") |
53 | | - | .addAttributes("img", "align", "alt", "height", "src", "title", "width") |
54 | | - | .addAttributes("div", "itemscope", "itemtype") |
55 | | - | .addAttributes("source", "src") |
56 | | - | .addAttributes(":all", SAFE_ATTRIBUTES) |
57 | | - | .addProtocols("a", "href", SAFE_ANCHOR_SCHEMES) |
58 | | - | .addProtocols("blockquote", "cite", "http", "https") |
59 | | - | .addProtocols("cite", "cite", "http", "https") |
60 | | - | .addProtocols("img", "src", "http", "https") |
61 | | - | .addProtocols("q", "cite", "http", "https") |
62 | | - | .preserveRelativeLinks(true); |
63 | | - | } |
64 | | - | |
65 | | - | public static boolean hasAncestor(Node node, Collection<String> tags) { |
66 | | - | Node parent = node.parentNode(); |
67 | | - | while (parent != null) { |
68 | | - | if (parent instanceof Element) { |
69 | | - | Element e = (Element) parent; |
70 | | - | if (tags.contains(e.tagName().toLowerCase())) { |
71 | | - | return true; |
72 | | - | } |
73 | | - | } |
74 | | - | |
75 | | - | parent = parent.parentNode(); |
76 | | - | } |
77 | | - | |
78 | | - | return false; |
79 | | - | } |
80 | | - | |
81 | | - | public static boolean hasAncestor(Node node, String tag) { |
82 | | - | return hasAncestor(node, Lists.newArrayList(tag)); |
83 | | - | } |
84 | | - | |
85 | | - | public static void appendReplacement(Matcher matcher, Node node, String replacement) { |
86 | | - | StringBuffer buffer = new StringBuffer(); |
87 | | - | matcher.appendReplacement(buffer, ""); |
88 | | - | if (buffer.length() != 0) |
89 | | - | node.before(new TextNode(buffer.toString(), node.baseUri())); |
90 | | - | node.before(replacement); |
91 | | - | } |
92 | | - | |
93 | | - | public static void appendTail(Matcher matcher, Node node) { |
94 | | - | StringBuffer buffer = new StringBuffer(); |
95 | | - | matcher.appendTail(buffer); |
96 | | - | if (buffer.length() != 0) |
97 | | - | node.before(new TextNode(buffer.toString(), node.baseUri())); |
98 | | - | node.remove(); |
99 | | - | } |
100 | | - | |
101 | | - | public static Document sanitize(Document doc) { |
102 | | - | return new Cleaner(whiteList).clean(doc); |
103 | | - | } |
104 | | - | |
105 | | - | public static Document parse(String html) { |
106 | | - | // Use a faked baseURI, otherwise all relative urls will be stripped out |
107 | | - | return Jsoup.parseBodyFragment(html, "http://localhost/sanitize"); |
108 | | - | } |
109 | | - | |
110 | | - | public static String escape(String text) { |
111 | | - | String escapedText = ""; |
112 | | - | for (int i=0; i<text.length(); i++) { |
113 | | - | char ch = text.charAt(i); |
114 | | - | if (ch == ' ' || ch == '\t' || !Character.isWhitespace(ch)) |
115 | | - | escapedText += ch; |
116 | | - | } |
117 | | - | return HtmlEscape.escapeHtml5(escapedText); |
118 | | - | } |
119 | | - | |
120 | | - | public static String formatAsHtml(String text) { |
121 | | - | text = HtmlEscape.escapeHtml5(text); |
122 | | - | text = StringUtils.replace(text, "\n", "<br>"); |
123 | | - | text = StringUtils.replace(text, " ", " "); |
124 | | - | return text; |
125 | | - | } |
126 | | - | } |
127 | | - | |