Commit 7037a2ef authored by Ate Douma's avatar Ate Douma

CMS-11148 Update to hippo htmldiff library 2.0.0 which no longer uses nekohtml...

CMS-11148 Update to hippo htmldiff library 2.0.0 which no longer uses nekohtml but htmlcleaner (backported from CMS-11122)

Dropping nekohtml also required replacing its direct usage in HtmlValidator and HtmlDiffModelTest with htmlcleaner.
Also replaced deprecated (Xerces provided) SAXSerializer usage for Console XML Content Export with standard JAXP XMLTransformer.

Finally removed now no longer needed xerces dependencies

(cherry picked from commit 411d2bf1)
parent 5076c28b
......@@ -29,6 +29,11 @@
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>${sf.htmlcleaner.version}</version>
</dependency>
<dependency>
<groupId>org.hippoecm</groupId>
<artifactId>htmldiff</artifactId>
......
......@@ -70,12 +70,6 @@
<artifactId>xmlunit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>${xerces.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
......
/*
* Copyright 2008-2014 Hippo B.V. (http://www.onehippo.com)
* Copyright 2008-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -15,16 +15,15 @@
*/
package org.hippoecm.frontend.plugins.console.menu.content;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.StringWriter;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.xml.transform.Source;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.wicket.ajax.AjaxRequestTarget;
import org.apache.wicket.ajax.markup.html.AjaxLink;
......@@ -36,15 +35,12 @@ import org.apache.wicket.model.PropertyModel;
import org.apache.wicket.model.StringResourceModel;
import org.apache.wicket.util.value.IValueMap;
import org.apache.wicket.util.value.ValueMap;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.hippoecm.frontend.dialog.AbstractDialog;
import org.hippoecm.frontend.model.IModelReference;
import org.hippoecm.frontend.model.JcrNodeModel;
import org.hippoecm.repository.api.HippoSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
public class ContentExportDialog extends AbstractDialog<Node> {
......@@ -94,10 +90,18 @@ public class ContentExportDialog extends AbstractDialog<Node> {
public void onClick(AjaxRequestTarget target) {
String export;
try {
SAXTransformerFactory stf = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = stf.newTransformerHandler();
StringWriter exportWriter = new StringWriter();
Transformer transformer = handler.getTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(2));
handler.setResult(new StreamResult(exportWriter));
Node node = nodeModel.getObject();
ByteArrayOutputStream out = new ByteArrayOutputStream();
((HippoSession) node.getSession()).exportDereferencedView(node.getPath(), out, skipBinary, false);
export = prettyPrint(out.toByteArray());
((HippoSession) node.getSession()).exportDereferencedView(node.getPath(), handler, skipBinary, false);
export = exportWriter.getBuffer().toString();
JcrNodeModel newNodeModel = new JcrNodeModel(node);
modelReference.setModel(newNodeModel);
} catch (Exception e) {
......@@ -135,26 +139,4 @@ public class ContentExportDialog extends AbstractDialog<Node> {
public IValueMap getProperties() {
return SIZE;
}
// privates
private String prettyPrint(byte[] bytes) throws Exception {
Source source = new StreamSource(new ByteArrayInputStream(bytes));
DOMResult result = new DOMResult();
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer identityTransformer = transformerFactory.newTransformer();
identityTransformer.transform(source, result);
Document doc = (Document) result.getNode();
OutputFormat format = new OutputFormat(doc);
format.setEncoding("UTF-8");
format.setIndenting(true);
format.setIndent(2);
format.setLineWidth(80);
ByteArrayOutputStream out = new ByteArrayOutputStream();
XMLSerializer xmlSerializer = new XMLSerializer(out, format);
xmlSerializer.serialize(doc);
return out.toString("UTF-8");
}
}
......@@ -30,12 +30,6 @@
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>net.sourceforge.nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>${nekohtml.version}</version>
</dependency>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-repository-api</artifactId>
......
/*
* Copyright 2009-2016 Hippo B.V. (http://www.onehippo.com)
*
* Copyright 2009-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -15,21 +15,19 @@
*/
package org.hippoecm.frontend.editor.validator;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import com.google.common.base.CharMatcher;
import org.apache.wicket.util.io.IClusterable;
import org.cyberneko.html.parsers.SAXParser;
import org.hippoecm.frontend.validation.ValidationException;
import org.hippoecm.frontend.validation.ValidatorMessages;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.htmlcleaner.BaseToken;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.ContentNode;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
public class HtmlValidator implements IClusterable {
......@@ -37,60 +35,36 @@ public class HtmlValidator implements IClusterable {
public static final String[] VALID_ELEMENTS = new String[]{"img", "object", "embed", "form", "applet", "iframe"};
static class Handler extends DefaultHandler {
boolean valid = false;
@Override
public void characters(char[] chars, int start, int length) throws SAXException {
String value = new String(chars, start, length).intern();
if (CharMatcher.INVISIBLE.negate().matchesAnyOf(value)) {
valid = true;
}
}
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException {
for (String element : VALID_ELEMENTS) {
if (element.equalsIgnoreCase(localName)) {
valid = true;
break;
}
}
}
boolean isValid() {
return valid;
public Set<String> validateNonEmpty(String html) {
final Set<String> result = new HashSet<>();
final HtmlCleaner cleaner = new HtmlCleaner();
final CleanerProperties properties = cleaner.getProperties();
properties.setOmitXmlDeclaration(true);
properties.setOmitHtmlEnvelope(true);
properties.setOmitComments(true);
properties.setNamespacesAware(false);
properties.setDeserializeEntities(true);
if (isEmpty(cleaner.clean(html))) {
result.add(ValidatorMessages.HTML_IS_EMPTY);
}
return result;
}
public Set<String> validateNonEmpty(String html) throws ValidationException {
Set<String> result = new HashSet<>();
Handler handler = new Handler();
try {
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(html));
SAXParser parser = new SAXParser();
parser.setFeature("http://xml.org/sax/features/namespaces", true);
parser.setFeature("http://cyberneko.org/html/features/override-namespaces", false);
parser.setFeature("http://cyberneko.org/html/features/insert-namespaces", false);
parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
parser.setContentHandler(handler);
parser.parse(is);
if (!handler.isValid()) {
result.add(ValidatorMessages.HTML_IS_EMPTY);
private boolean isEmpty(final TagNode node) {
for (BaseToken item : node.getAllChildren()) {
if (item instanceof TagNode) {
final TagNode childNode = (TagNode)item;
if (Arrays.stream(VALID_ELEMENTS).anyMatch(e -> childNode.getName().equalsIgnoreCase(e))
|| CharMatcher.INVISIBLE.negate().matchesAnyOf(childNode.getText())
|| !isEmpty(childNode)) {
return false;
}
} else if (item instanceof ContentNode) {
if (CharMatcher.INVISIBLE.negate().matchesAnyOf(((ContentNode) item).getContent())) {
return false;
}
}
} catch (SAXException e) {
result.add(ValidatorMessages.INVALID_XML);
} catch (IOException e) {
throw new ValidationException("Input/output error", e);
}
return result;
return true;
}
}
......@@ -111,10 +111,8 @@
<jetty.version>6.1.22</jetty.version>
<xerces.version>2.8.1</xerces.version>
<wicket.version>6.19.0</wicket.version>
<wicket-extjs.version>0.25.3</wicket-extjs.version>
<nekohtml.version>1.9.13</nekohtml.version>
<httpclient.version>4.5.5</httpclient.version>
<joda-time.version>2.0</joda-time.version>
......@@ -126,7 +124,7 @@
<imgscalr-lib.version>4.2</imgscalr-lib.version>
<yui.sources.version>1.01.12</yui.sources.version>
<hippo.htmldiff.version>1.01.04</hippo.htmldiff.version>
<hippo.htmldiff.version>2.0.0-SNAPSHOT</hippo.htmldiff.version>
<hippo.jcrdiff.version>1.01.06</hippo.jcrdiff.version>
<hippo.ckeditor.version>4.5.11-h1</hippo.ckeditor.version>
......@@ -325,16 +323,6 @@
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>${xerces.version}</version>
</dependency>
<dependency>
<groupId>net.sourceforge.nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>${nekohtml.version}</version>
</dependency>
<dependency>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-rs-client</artifactId>
......
/*
* Copyright 2010-2013 Hippo B.V. (http://www.onehippo.com)
* Copyright 2010-2018 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -17,22 +17,14 @@ package org.hippoecm.frontend.plugins.standards.diff;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.io.StringReader;
import org.apache.wicket.model.IModel;
import org.apache.wicket.model.Model;
import org.hippoecm.frontend.HippoTester;
import org.hippoecm.frontend.PluginPage;
import org.htmlcleaner.HtmlCleaner;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.outerj.daisy.diff.helper.NekoHtmlParser;
import org.outerj.daisy.diff.helper.SaxBuffer;
import org.outerj.daisy.diff.helper.SaxBuffer.SaxBit;
import org.outerj.daisy.diff.helper.SaxBuffer.StartElement;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class HtmlDiffModelTest {
......@@ -50,35 +42,23 @@ public class HtmlDiffModelTest {
tester = null;
}
static int countImages(String content) throws IOException, SAXException {
int count = 0;
NekoHtmlParser parser = new NekoHtmlParser();
InputSource source = new InputSource();
source.setCharacterStream(new StringReader(content));
SaxBuffer buffer = parser.parse(source);
for (SaxBit bit : buffer.getBits()) {
if (bit instanceof StartElement) {
if ("img".equals(((StartElement) bit).localName)) {
count++;
}
}
}
return count;
static int countImages(String content) {
return new HtmlCleaner().clean(content).getElementsByName("img", true).length;
}
@Test
public void removedImageIsShown() throws Exception {
IModel<String> oldModel = new Model<String>("<html><body><img src=\"a\">abc</img></body></html>");
IModel<String> newModel = new Model<String>("<html><body></body></html>");
public void removedImageIsShown() {
IModel<String> oldModel = new Model<>("<html><body><img src=\"a\">abc</img></body></html>");
IModel<String> newModel = new Model<>("<html><body></body></html>");
HtmlDiffModel dm = new HtmlDiffModel(oldModel, newModel);
String content = dm.getObject();
assertEquals(1, countImages(content));
}
@Test
public void addedImageIsShownOnce() throws Exception {
IModel<String> oldModel = new Model<String>("<html><body></body></html>");
IModel<String> newModel = new Model<String>("<html><body><img src=\"a\" /></body></html>");
public void addedImageIsShownOnce() {
IModel<String> oldModel = new Model<>("<html><body></body></html>");
IModel<String> newModel = new Model<>("<html><body><img src=\"a\" /></body></html>");
HtmlDiffModel dm = new HtmlDiffModel(oldModel, newModel);
String content = dm.getObject();
......@@ -86,9 +66,9 @@ public class HtmlDiffModelTest {
}
@Test
public void constantImageIsShownOnce() throws Exception {
IModel<String> oldModel = new Model<String>("<html><body><img src=\"a\" /></body></html>");
IModel<String> newModel = new Model<String>("<html><body><img src=\"a\" /></body></html>");
public void constantImageIsShownOnce() {
IModel<String> oldModel = new Model<>("<html><body><img src=\"a\" /></body></html>");
IModel<String> newModel = new Model<>("<html><body><img src=\"a\" /></body></html>");
HtmlDiffModel dm = new HtmlDiffModel(oldModel, newModel);
String content = dm.getObject();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment