Commit b77d2956 authored by Mathijs den Burger's avatar Mathijs den Burger

HHP-26 Reintegrate bugfix/HHP-26

parents b214f232 b13eb279
......@@ -85,7 +85,7 @@ public class WhitelistHtmlFilter implements HtmlFilter {
.collect(Collectors.toMap(attribute -> attribute.getKey(), attribute -> {
final String value = attribute.getValue();
final String normalizedValue =
cleanCRLFTAB(CharacterReferenceNormalizer.normalize(value.toLowerCase().trim()));
cleanCRLFTAB(CharacterReferenceNormalizer.normalizeAttributeContent(value.toLowerCase().trim()));
if (omitJavascriptProtocol &&
(normalizedValue.startsWith(JAVASCRIPT_PROTOCOL) ||
checkDataAttrValue(node.getName(), attribute.getKey(), normalizedValue))) {
......
/*
* Copyright 2016-2017 Hippo B.V. (http://www.onehippo.com)
* Copyright 2016-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -24,11 +24,15 @@ import org.htmlcleaner.SpecialEntity;
public class CharacterReferenceNormalizer {
private enum ConvertQuote { YES, NO };
private static class ResultWriter {
private final StringBuilder result;
private final ConvertQuote convertQuote;
ResultWriter(final int capacity) {
private ResultWriter(final int capacity, final ConvertQuote convertQuote) {
result = new StringBuilder(capacity);
this.convertQuote = convertQuote;
}
@Override
......@@ -38,6 +42,9 @@ public class CharacterReferenceNormalizer {
ResultWriter write(final char ch) {
switch (ch) {
case '"':
result.append(this.convertQuote == ConvertQuote.NO ? """ : ch);
break;
case '&':
result.append("&");
break;
......@@ -71,11 +78,11 @@ public class CharacterReferenceNormalizer {
private static class CharacterReferenceFinder {
private final static Pattern pattern = Pattern.compile(
"&(?<entity>\\p{Alnum}+);|&#(?<dec>\\p{Digit}+);|&#0*(x|X)(?<hex>\\p{XDigit}+);");
/* Regexp with searching for 3 patterns:
* 1) character entity reference, e.g. &aacute;
* 2) decimal numeric character reference, e.g. &#225;
* 3) hexadecimal numeric character reference, e.g. &#0xE1;
*/
/* Regexp with searching for 3 patterns:
* 1) character entity reference, e.g. &aacute;
* 2) decimal numeric character reference, e.g. &#225;
* 3) hexadecimal numeric character reference, e.g. &#0xE1;
*/
private static final SpecialEntities specialEntities = SpecialEntities.INSTANCE;
......@@ -120,13 +127,33 @@ public class CharacterReferenceNormalizer {
}
/**
* @deprecated use {@link #normalizeElementContent(String)} instead.
*/
@Deprecated
public static String normalize(final String string) {
return normalizeElementContent(string);
}
public static String normalizeElementContent(final String string) {
return normalize(string, ConvertQuote.YES);
}
public static String normalizeAttributeContent(final String string) {
return normalize(string, ConvertQuote.NO);
}
/**
* Transforms character references (e.g. &amp;aacute;, &amp;#225;, etc.) to characters by applying the same rules as
* CKEditor in Hippo's default configuration. These rules are: convert all character references to the character
* they represent except for &amp;nbsp;, &amp;gt;, &amp;lt;, &amp;amp; - those must always be encoded.
*
* @param string the string to normalize
* @param convertQuote whether to convert &quot; to " or not.
* @return the normalized string
*/
public static String normalize(final String string) {
final ResultWriter resultWriter = new ResultWriter(string.length());
private static String normalize(final String string, final ConvertQuote convertQuote) {
final ResultWriter resultWriter = new ResultWriter(string.length(), convertQuote);
final CharacterReferenceFinder finder = new CharacterReferenceFinder(string);
int current = 0;
......
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
* Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -16,10 +16,7 @@
package org.onehippo.cms7.services.htmlprocessor.serialize;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CompactHtmlSerializer;
import org.htmlcleaner.PrettyHtmlSerializer;
import org.htmlcleaner.Serializer;
import org.htmlcleaner.SimpleHtmlSerializer;
public class HtmlSerializerFactory {
......@@ -30,26 +27,11 @@ public class HtmlSerializerFactory {
public static Serializer create(final HtmlSerializer serializer, final CleanerProperties properties) {
switch (serializer) {
case PRETTY:
return new PrettyHtmlSerializer(properties) {
@Override
protected String escapeText(final String content) {
return CharacterReferenceNormalizer.normalize(content);
}
};
return new NormalizingPrettyHtmlSerializer(properties);
case COMPACT:
return new CompactHtmlSerializer(properties) {
@Override
protected String escapeText(final String content) {
return CharacterReferenceNormalizer.normalize(content);
}
};
return new NormalizingCompactHtmlSerializer(properties);
default:
return new SimpleHtmlSerializer(properties) {
@Override
protected String escapeText(final String content) {
return CharacterReferenceNormalizer.normalize(content);
}
};
return new NormalizingSimpleHtmlSerializer(properties);
}
}
......
/*
* Copyright 2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor.serialize;
import java.io.IOException;
import java.io.Writer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CompactHtmlSerializer;
import org.htmlcleaner.TagNode;
class NormalizingCompactHtmlSerializer extends CompactHtmlSerializer {
private final ThreadLocal<Boolean> isElementContent;
NormalizingCompactHtmlSerializer(final CleanerProperties props) {
super(props);
isElementContent = ThreadLocal.withInitial(() -> true);
}
@Override
protected void serializeOpenTag(final TagNode tagNode, final Writer writer, final boolean newLine) throws IOException {
isElementContent.set(false);
try {
super.serializeOpenTag(tagNode, writer, newLine);
} finally {
isElementContent.set(true);
}
}
@Override
protected String escapeText(final String content) {
if (isElementContent.get()) {
return CharacterReferenceNormalizer.normalizeElementContent(content);
}
return CharacterReferenceNormalizer.normalizeAttributeContent(content);
}
}
/*
* Copyright 2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor.serialize;
import java.io.IOException;
import java.io.Writer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.PrettyHtmlSerializer;
import org.htmlcleaner.TagNode;
class NormalizingPrettyHtmlSerializer extends PrettyHtmlSerializer {
private final ThreadLocal<Boolean> isElementContent;
NormalizingPrettyHtmlSerializer(final CleanerProperties props) {
super(props);
isElementContent = ThreadLocal.withInitial(() -> true);
}
@Override
protected void serializeOpenTag(final TagNode tagNode, final Writer writer, final boolean newLine) throws IOException {
isElementContent.set(false);
try {
super.serializeOpenTag(tagNode, writer, newLine);
} finally {
isElementContent.set(true);
}
}
@Override
protected String escapeText(final String content) {
if (isElementContent.get()) {
return CharacterReferenceNormalizer.normalizeElementContent(content);
}
return CharacterReferenceNormalizer.normalizeAttributeContent(content);
}
}
/*
* Copyright 2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor.serialize;
import java.io.IOException;
import java.io.Writer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlcleaner.TagNode;
class NormalizingSimpleHtmlSerializer extends SimpleHtmlSerializer {
private final ThreadLocal<Boolean> isElementContent;
NormalizingSimpleHtmlSerializer(final CleanerProperties props) {
super(props);
isElementContent = ThreadLocal.withInitial(() -> true);
}
@Override
protected void serializeOpenTag(final TagNode tagNode, final Writer writer, final boolean newLine) throws IOException {
isElementContent.set(false);
try {
super.serializeOpenTag(tagNode, writer, newLine);
} finally {
isElementContent.set(true);
}
}
@Override
protected String escapeText(final String content) {
if (isElementContent.get()) {
return CharacterReferenceNormalizer.normalizeElementContent(content);
}
return CharacterReferenceNormalizer.normalizeAttributeContent(content);
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
* Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -28,6 +28,7 @@ import org.hamcrest.CoreMatchers;
import org.junit.Before;
import org.junit.Test;
import org.onehippo.cms7.services.htmlprocessor.filter.Element;
import org.onehippo.cms7.services.htmlprocessor.serialize.HtmlSerializer;
import org.onehippo.repository.mock.MockNode;
import static org.junit.Assert.assertEquals;
......@@ -156,6 +157,25 @@ public class HtmlProcessorTest {
}
@Test
public void characterReferencesInAttributesAreNotNormalized() throws IOException {
for (HtmlSerializer serializer : HtmlSerializer.values()) {
final HtmlProcessorConfig config = new HtmlProcessorConfig();
config.setFilter(true);
config.setSerializer(serializer);
final Element table = Element.create("table", "summary");
config.setWhitelistElements(Collections.singletonList(table));
processor = new HtmlProcessorImpl(config);
final String html = "<table summary=\"&quot; onmouseover=alert('hi')\"></table>";
final String written = processor.write(html, Collections.emptyList());
assertEquals(serializer.name(), html, written);
}
}
@Test
public void testReadVisitor() throws Exception {
final HtmlProcessorConfig htmlProcessorConfig = new HtmlProcessorConfig();
......
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
* Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -22,60 +22,66 @@ import static org.junit.Assert.assertEquals;
public class CharacterReferenceNormalizerTest {
@Test
public void testCharacterEntityConversion() throws Exception {
final String result = CharacterReferenceNormalizer.normalize("&nbsp; &gt; &lt; &amp; á &aacute;");
public void testCharacterEntityConversion() {
final String result = CharacterReferenceNormalizer.normalizeElementContent("&nbsp; &gt; &lt; &amp; á &aacute;");
assertEquals("&nbsp; &gt; &lt; &amp; á á", result);
}
@Test
public void testQuoteConversion() throws Exception {
final String result = CharacterReferenceNormalizer.normalize("' \" &apos; &quot;");
public void testQuoteConversionInTextContent() {
final String result = CharacterReferenceNormalizer.normalizeElementContent("' \" &apos; &quot;");
assertEquals("' \" ' \"", result);
}
@Test
public void testQuoteConversionInAttributeContent() {
final String result = CharacterReferenceNormalizer.normalizeAttributeContent("' \" &apos; &quot;");
assertEquals("' &quot; ' &quot;", result);
}
@Test
public void test_base_entities_are_encoded_as_entities() {
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalize("> < & " + (char) 160));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalizeElementContent("> < & " + (char) 160));
}
@Test
public void test_base_entities_remain_encoded_as_entities() {
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalize("&gt; &lt; &amp; &nbsp;"));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalizeElementContent("&gt; &lt; &amp; &nbsp;"));
}
@Test
public void test_numeric_entities_are_converted_to_characters() {
assertEquals("á á á á", CharacterReferenceNormalizer.normalize("&#225; &#xe1; &#X00E1; &#0x0e1;"));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalize("&#62; &#60; &#38; &#160;"));
assertEquals("á á á á", CharacterReferenceNormalizer.normalizeElementContent("&#225; &#xe1; &#X00E1; &#0x0e1;"));
assertEquals("&gt; &lt; &amp; &nbsp;", CharacterReferenceNormalizer.normalizeElementContent("&#62; &#60; &#38; &#160;"));
}
@Test
public void test_named_entities_are_converted_to_characters() {
assertEquals("á", CharacterReferenceNormalizer.normalize("&aacute;"));
assertEquals("á", CharacterReferenceNormalizer.normalizeElementContent("&aacute;"));
}
@Test
public void test_named_entities_with_numbers_are_converted() {
assertEquals("¾", CharacterReferenceNormalizer.normalize("&frac34;"));
assertEquals("¾", CharacterReferenceNormalizer.normalizeElementContent("&frac34;"));
}
@Test
public void test_incorrect_numeric_entity_conversion() {
assertEquals("&amp;#12ab; &amp;#x12abz;", CharacterReferenceNormalizer.normalize("&#12ab; &#x12abz;"));
assertEquals("&amp;#", CharacterReferenceNormalizer.normalize("&#"));
assertEquals("&amp;#12", CharacterReferenceNormalizer.normalize("&#12"));
assertEquals("&amp;#12ab; &amp;#x12abz;", CharacterReferenceNormalizer.normalizeElementContent("&#12ab; &#x12abz;"));
assertEquals("&amp;#", CharacterReferenceNormalizer.normalizeElementContent("&#"));
assertEquals("&amp;#12", CharacterReferenceNormalizer.normalizeElementContent("&#12"));
}
@Test
public void test_incorrect_named_entity_conversion() {
assertEquals("&amp;nonsense;", CharacterReferenceNormalizer.normalize("&nonsense;"));
assertEquals("&amp;", CharacterReferenceNormalizer.normalize("&"));
assertEquals("&amp;nonsense", CharacterReferenceNormalizer.normalize("&nonsense"));
assertEquals("&amp;nonsense;", CharacterReferenceNormalizer.normalizeElementContent("&nonsense;"));
assertEquals("&amp;", CharacterReferenceNormalizer.normalizeElementContent("&"));
assertEquals("&amp;nonsense", CharacterReferenceNormalizer.normalizeElementContent("&nonsense"));
}
@Test
public void test_incorrect_entity_directly_followed_by_correct_entity_conversion() {
assertEquals("&amp;nonsenseá", CharacterReferenceNormalizer.normalize("&nonsense&#225;"));
assertEquals("&amp;#225á", CharacterReferenceNormalizer.normalize("&#225&#225;"));
assertEquals("&amp;nonsenseá", CharacterReferenceNormalizer.normalizeElementContent("&nonsense&#225;"));
assertEquals("&amp;#225á", CharacterReferenceNormalizer.normalizeElementContent("&#225&#225;"));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment