code.onehippo.org is currently readonly. We are migrating to code.bloomreach.com, please continue working there on Monday 14/12. See: https://docs.bloomreach.com/display/engineering/GitLab

Commit ca54d146 authored by Bert Leunis's avatar Bert Leunis Committed by Mathijs den Burger

CMS-11213 html serializers don't normalize quotes in attributes

A backport of HHP-26. Plus a text change in a pom file.

(cherry picked from commit 9600a438)
parent b55841c0
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright 2007-2016 Hippo B.V. (http://www.onehippo.com)
Copyright 2007-2018 Hippo B.V. (http://www.onehippo.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -23,8 +23,8 @@
<version>4.2.13-SNAPSHOT</version>
</parent>
<name>CMS Plugins</name>
<description>Hippo CMS plugins - console and login / logout</description>
<name>Console Frontend Plugins</name>
<description>Console Frontend Plugins</description>
<artifactId>hippo-cms-console-frontend</artifactId>
<packaging>jar</packaging>
......@@ -78,7 +78,6 @@
<artifactId>wicket-core</artifactId>
</dependency>
<!-- test -->
<dependency>
<groupId>junit</groupId>
......
/*
* Copyright 2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2016-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -24,11 +24,16 @@ import org.htmlcleaner.SpecialEntity;
class CharacterReferenceNormalizer {
private enum ConvertQuote {YES, NO}
private static class ResultWriter {
private final StringBuilder result;
ResultWriter(final int capacity) {
private final ConvertQuote convertQuote;
private ResultWriter(final int capacity, final ConvertQuote convertQuote) {
result = new StringBuilder(capacity);
this.convertQuote = convertQuote;
}
@Override
......@@ -38,6 +43,9 @@ class CharacterReferenceNormalizer {
ResultWriter write(final char ch) {
switch (ch) {
case '"':
result.append(this.convertQuote == ConvertQuote.NO ? "&quot;" : ch);
break;
case '&':
result.append("&amp;");
break;
......@@ -120,13 +128,33 @@ class CharacterReferenceNormalizer {
}
/**
* @deprecated use {@link #normalizeElementContent(String)} instead.
*/
@Deprecated
public static String normalize(final String string) {
return normalizeElementContent(string);
}
public static String normalizeElementContent(final String string) {
return normalize(string, ConvertQuote.YES);
}
public static String normalizeAttributeContent(final String string) {
return normalize(string, ConvertQuote.NO);
}
/**
* Transforms character references (e.g. &amp;aacute;, &amp;#225;, etc.) to characters by applying the same rules as
* CKEditor in Hippo's default configuration. These rules are: convert all character references to the character
* they represent except for &amp;nbsp;, &amp;gt;, &amp;lt;, &amp;amp; - those must always be encoded.
*
* @param string the string to normalize
* @param convertQuote whether to convert &quot; to " or not.
* @return the normalized string
*/
static String normalize(final String string) {
final ResultWriter resultWriter = new ResultWriter(string.length());
private static String normalize(final String string, final ConvertQuote convertQuote) {
final ResultWriter resultWriter = new ResultWriter(string.length(), convertQuote);
final CharacterReferenceFinder finder = new CharacterReferenceFinder(string);
int current = 0;
......
/*
* Copyright 2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2016-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -16,18 +16,38 @@
package org.hippoecm.frontend.plugins.richtext.htmlcleaner;
import java.io.IOException;
import java.io.Writer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CompactHtmlSerializer;
import org.htmlcleaner.TagNode;
public class HippoCompactHtmlSerializer extends CompactHtmlSerializer {
private final ThreadLocal<Boolean> isElementContent;
public HippoCompactHtmlSerializer(final CleanerProperties properties) {
super(properties);
isElementContent = ThreadLocal.withInitial(() -> true);
}
@Override
protected void serializeOpenTag(final TagNode tagNode, final Writer writer, final boolean newLine) throws IOException {
isElementContent.set(false);
try {
super.serializeOpenTag(tagNode, writer, newLine);
} finally {
isElementContent.set(true);
}
}
@Override
protected String escapeText(final String content) {
return CharacterReferenceNormalizer.normalize(content);
if (isElementContent.get()) {
return CharacterReferenceNormalizer.normalizeElementContent(content);
}
return CharacterReferenceNormalizer.normalizeAttributeContent(content);
}
}
/*
* Copyright 2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2016-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -16,18 +16,38 @@
package org.hippoecm.frontend.plugins.richtext.htmlcleaner;
import java.io.IOException;
import java.io.Writer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.PrettyHtmlSerializer;
import org.htmlcleaner.TagNode;
public class HippoPrettyHtmlSerializer extends PrettyHtmlSerializer {
private final ThreadLocal<Boolean> isElementContent;
public HippoPrettyHtmlSerializer(final CleanerProperties properties) {
super(properties);
isElementContent = ThreadLocal.withInitial(() -> true);
}
@Override
protected void serializeOpenTag(final TagNode tagNode, final Writer writer, final boolean newLine) throws IOException {
isElementContent.set(false);
try {
super.serializeOpenTag(tagNode, writer, newLine);
} finally {
isElementContent.set(true);
}
}
@Override
protected String escapeText(final String content) {
return CharacterReferenceNormalizer.normalize(content);
if (isElementContent.get()) {
return CharacterReferenceNormalizer.normalizeElementContent(content);
}
return CharacterReferenceNormalizer.normalizeAttributeContent(content);
}
}
/*
* Copyright 2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2016-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -16,18 +16,37 @@
package org.hippoecm.frontend.plugins.richtext.htmlcleaner;
import java.io.IOException;
import java.io.Writer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlcleaner.TagNode;
public class HippoSimpleHtmlSerializer extends SimpleHtmlSerializer {
private final ThreadLocal<Boolean> isElementContent;
public HippoSimpleHtmlSerializer(final CleanerProperties properties) {
super(properties);
isElementContent = ThreadLocal.withInitial(() -> true);
}
@Override
protected String escapeText(final String content) {
return CharacterReferenceNormalizer.normalize(content);
protected void serializeOpenTag(final TagNode tagNode, final Writer writer, final boolean newLine) throws IOException {
isElementContent.set(false);
try {
super.serializeOpenTag(tagNode, writer, newLine);
} finally {
isElementContent.set(true);
}
}
@Override
protected String escapeText(final String content) {
if (isElementContent.get()) {
return CharacterReferenceNormalizer.normalizeElementContent(content);
}
return CharacterReferenceNormalizer.normalizeAttributeContent(content);
}
}
/*
* Copyright 2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2016-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -24,48 +24,53 @@ public class CharacterReferenceNormalizerTest {
@Test
public void test_base_entities_are_encoded_as_entities() {
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalize("> < & " + (char) 160));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalizeElementContent("> < & " + (char) 160));
}
@Test
public void test_base_entities_remain_encoded_as_entities() {
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalize("&gt; &lt; &amp; &nbsp;"));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalizeElementContent("&gt; &lt; &amp; &nbsp;"));
}
@Test
public void test_numeric_entities_are_converted_to_characters() {
assertEquals("á á á á", CharacterReferenceNormalizer.normalize("&#225; &#xe1; &#X00E1; &#0x0e1;"));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalize("&#62; &#60; &#38; &#160;"));
assertEquals("á á á á", CharacterReferenceNormalizer.normalizeElementContent("&#225; &#xe1; &#X00E1; &#0x0e1;"));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalizeElementContent("&#62; &#60; &#38; &#160;"));
}
@Test
public void test_named_entities_are_converted_to_characters() {
assertEquals("á", CharacterReferenceNormalizer.normalize("&aacute;"));
assertEquals("á", CharacterReferenceNormalizer.normalizeElementContent("&aacute;"));
}
@Test
public void test_named_entities_with_numbers_are_converted() {
assertEquals("¾", CharacterReferenceNormalizer.normalize("&frac34;"));
assertEquals("¾", CharacterReferenceNormalizer.normalizeElementContent("&frac34;"));
}
@Test
public void test_incorrect_numeric_entity_conversion() {
assertEquals("&amp;#12ab; &amp;#x12abz;", CharacterReferenceNormalizer.normalize("&#12ab; &#x12abz;"));
assertEquals("&amp;#", CharacterReferenceNormalizer.normalize("&#"));
assertEquals("&amp;#12", CharacterReferenceNormalizer.normalize("&#12"));
assertEquals("&amp;#12ab; &amp;#x12abz;", CharacterReferenceNormalizer.normalizeElementContent("&#12ab; &#x12abz;"));
assertEquals("&amp;#", CharacterReferenceNormalizer.normalizeElementContent("&#"));
assertEquals("&amp;#12", CharacterReferenceNormalizer.normalizeElementContent("&#12"));
}
@Test
public void test_incorrect_named_entity_conversion() {
assertEquals("&amp;nonsense;", CharacterReferenceNormalizer.normalize("&nonsense;"));
assertEquals("&amp;", CharacterReferenceNormalizer.normalize("&"));
assertEquals("&amp;nonsense", CharacterReferenceNormalizer.normalize("&nonsense"));
assertEquals("&amp;nonsense;", CharacterReferenceNormalizer.normalizeElementContent("&nonsense;"));
assertEquals("&amp;", CharacterReferenceNormalizer.normalizeElementContent("&"));
assertEquals("&amp;nonsense", CharacterReferenceNormalizer.normalizeElementContent("&nonsense"));
}
@Test
public void test_incorrect_entity_directly_followed_by_correct_entity_conversion() {
assertEquals("&amp;nonsenseá", CharacterReferenceNormalizer.normalize("&nonsense&#225;"));
assertEquals("&amp;#225á", CharacterReferenceNormalizer.normalize("&#225&#225;"));
assertEquals("&amp;nonsenseá", CharacterReferenceNormalizer.normalizeElementContent("&nonsense&#225;"));
assertEquals("&amp;#225á", CharacterReferenceNormalizer.normalizeElementContent("&#225&#225;"));
}
@Test
public void testQuoteConversionInAttributeContent() {
final String result = CharacterReferenceNormalizer.normalizeAttributeContent("' \" &apos; &quot;");
assertEquals("' &quot; ' &quot;", result);
}
}
/*
* Copyright 2014-2016 Hippo B.V. (http://www.onehippo.com)
* Copyright 2014-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -140,6 +140,17 @@ public class HtmlCleanerPluginTest extends PluginTest {
assertEquals("' \" ' \"", html);
}
@Test
public void testQuoteConversionNotWithinAttributes() throws Exception {
final IPluginConfig pluginConfig = getPluginConfig();
final HtmlCleanerPlugin htmlCleanerPlugin = new HtmlCleanerPlugin(null, pluginConfig);
final String html = "<table summary=\"&quot; onmouseover=alert('hi')\"></table>";
final String cleanedHTML = htmlCleanerPlugin.clean(html, false, null, null);
assertEquals(html, cleanedHTML);
}
@Test
public void expectScriptTagIsNotRemoved() throws Exception {
final IPluginConfig pluginConfig = getPluginConfig();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment