Commit 68e1bb96 authored by Ate Douma's avatar Ate Douma

CMS-11046 [Backport 11.2] Improved cleaning

parent 8c618a65
/*
* Copyright 2014-2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2014-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.hippoecm.frontend.plugin.IPluginContext;
import org.hippoecm.frontend.plugin.Plugin;
......@@ -52,6 +53,8 @@ public class HtmlCleanerPlugin extends Plugin implements IHtmlCleanerService {
private static final String OMIT_COMMENTS = "omitComments";
private static final String FILTER = "filter";
private static final String JAVASCRIPT_PROTOCOL = "javascript:";
private static final String DATA_PROTOCOL = "data:";
private static final Pattern CRLFTAB = Pattern.compile("[\r\n\t]");
private static final HippoCompactHtmlSerializer escaper = new HippoCompactHtmlSerializer(new CleanerProperties());
private final Map<String, Element> whitelist = new HashMap<>();
......@@ -128,8 +131,8 @@ public class HtmlCleanerPlugin extends Plugin implements IHtmlCleanerService {
attributesToRemove.add(attributeName);
continue;
}
final String value = escaper.escapeText(attributeValue.toLowerCase().trim());
if (value.startsWith(JAVASCRIPT_PROTOCOL)) {
final String value = cleanCRLFTAB(escaper.escapeText(attributeValue.toLowerCase().trim()));
if (value.startsWith(JAVASCRIPT_PROTOCOL) || checkDataAttrValue(node.getName(), attributeName, value)) {
attributes.put(attributeName, "");
}
}
......@@ -145,6 +148,16 @@ public class HtmlCleanerPlugin extends Plugin implements IHtmlCleanerService {
return node;
}
private static String cleanCRLFTAB(final String value) {
return CRLFTAB.matcher(value).replaceAll("");
}
private boolean checkDataAttrValue(final String tagName, final String attrName, final String attrValue) {
return attrValue.startsWith(DATA_PROTOCOL)
? ("a".equals(tagName) && "href".equals(attrName)) || ("object".equals(tagName) && "data".equals(attrName))
: false;
}
private String serialize(final TagNode html, final CleanerProperties properties) throws IOException {
if (html == null) {
return "";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment