Commit dcf4f8b6 authored by Arthur Bogaart's avatar Arthur Bogaart

HHP-1 Initial commit

parents
/*.iml
/.classpath
/.idea
/.project
/.settings
/target
This diff is collapsed.
Hippo CMS7 Services - htmlcleaner
Copyright 2017 Hippo B.V. (http://www.onehippo.com)
This product includes software developed by:
Hippo B.V., Amsterdam, The Netherlands (http://www.onehippo.com/);
The Apache Software Foundation (http://www.apache.org/).
NOTICE: Only our own original work is licensed under the terms of the
Apache License Version 2.0. The licenses of some libraries might impose
different redistribution or general licensing terms than those stated in the
Apache License. Users and redistributors are hereby requested to verify these
conditions and agree upon them.
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright 2017 Hippo B.V. (http://www.onehippo.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS"
BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-cms7-project</artifactId>
<version>29-SNAPSHOT</version>
</parent>
<name>Hippo CMS7 Services - htmlprocessor</name>
<artifactId>hippo-services-htmlprocessor</artifactId>
<version>4.0.0-SNAPSHOT</version>
<inceptionYear>2017</inceptionYear>
<properties>
<!-- use root project name for all project modules NOTICE files, should be the same as in the root NOTICE file -->
<notice.project.name>Hippo CMS7 Services - htmlprocessor</notice.project.name>
<!-- runtime dependencies -->
<hippo.repository.version>5.0.0-SNAPSHOT</hippo.repository.version>
<hippo.services.version>4.0.0-SNAPSHOT</hippo.services.version>
<commons-lang.version>2.6</commons-lang.version>
<sf.htmlcleaner.version>2.19</sf.htmlcleaner.version>
<!-- test dependencies -->
<easymock.version>3.0</easymock.version>
<junit.version>4.11</junit.version>
</properties>
<scm>
<connection>scm:git:https://code.onehippo.org/cms-community/hippo-services-htmlprocessor.git</connection>
<developerConnection>scm:git:git@code.onehippo.org:cms-community/hippo-services-htmlprocessor.git</developerConnection>
<url>https://code.onehippo.org/cms-community/hippo-services-htmlprocessor</url>
</scm>
<repositories>
<repository>
<id>hippo</id>
<name>Hippo Maven 2</name>
<url>https://maven.onehippo.com/maven2/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<updatePolicy>never</updatePolicy>
</releases>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-repository-api</artifactId>
<version>${hippo.repository.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-services</artifactId>
<version>${hippo.services.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>${sf.htmlcleaner.version}</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>${commons-lang.version}</version>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<version>${easymock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-repository-testutils</artifactId>
<version>${hippo.repository.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>
<profile>
<id>pedantic</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<inherited>false</inherited>
<configuration>
<excludes combine.children="append">
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import org.onehippo.cms7.services.processor.html.visit.TagVisitor;
public interface HtmlProcessor extends Serializable {
String read(final String html, final List<TagVisitor> readVisitors) throws IOException;
String write(final String html, final List<TagVisitor> writeVisitors) throws IOException;
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.RepositoryException;
import org.hippoecm.repository.util.JcrUtils;
import org.onehippo.cms7.services.processor.html.filter.Element;
import org.onehippo.cms7.services.processor.html.serialize.HtmlSerializer;
public class HtmlProcessorConfig implements Serializable {
private static final String DEFAULT_CHARSET = "UTF-8";
private static final boolean DEFAULT_FILTER = false;
private static final boolean DEFAULT_OMIT_COMMENTS = false;
private static final boolean DEFAULT_CONVERT_LINE_ENDINGS = true;
private static final HtmlSerializer DEFAULT_SERIALIZER = HtmlSerializer.SIMPLE;
// repository property names
private static final String CHARSET = "charset";
private static final String OMIT_COMMENTS = "omitComments";
private static final String CONVERT_LINE_ENDINGS = "convertLineEndings";
private static final String SERIALIZER = "serializer";
private static final String FILTER = "filter";
private static final String ATTRIBUTES = "attributes";
private String charset;
private HtmlSerializer serializer;
private boolean omitComments;
private boolean filter;
private boolean convertLineEndings;
private List<Element> whitelistElements;
public HtmlProcessorConfig() {
charset = DEFAULT_CHARSET;
filter = DEFAULT_FILTER;
convertLineEndings = DEFAULT_CONVERT_LINE_ENDINGS;
serializer = DEFAULT_SERIALIZER;
omitComments = DEFAULT_OMIT_COMMENTS;
}
public void reconfigure(final Node node) throws RepositoryException {
charset = JcrUtils.getStringProperty(node, CHARSET, DEFAULT_CHARSET);
convertLineEndings = JcrUtils.getBooleanProperty(node, CONVERT_LINE_ENDINGS, DEFAULT_CONVERT_LINE_ENDINGS);
filter = JcrUtils.getBooleanProperty(node, FILTER, DEFAULT_FILTER);
omitComments = JcrUtils.getBooleanProperty(node, OMIT_COMMENTS, DEFAULT_OMIT_COMMENTS);
final String serializerName = JcrUtils.getStringProperty(node, SERIALIZER, DEFAULT_SERIALIZER.name());
serializer = HtmlSerializer.valueOfOrDefault(serializerName);
if (node.hasNodes()) {
final String[] emptyAttr = new String[] {};
whitelistElements = new ArrayList<>();
final NodeIterator filters = node.getNodes();
while(filters.hasNext()) {
final Node filterNode = filters.nextNode();
final String[] attributes = JcrUtils.getMultipleStringProperty(filterNode, ATTRIBUTES, emptyAttr);
final String configName = filterNode.getName();
final int offset = configName.lastIndexOf('.');
final String elementName = offset != -1 ? configName.substring(offset + 1) : configName;
final Element element = Element.create(elementName, attributes);
whitelistElements.add(element);
}
}
}
public void setSerializer(final HtmlSerializer serializer) {
this.serializer = serializer;
}
public HtmlSerializer getSerializer() {
return serializer;
}
public void setCharset(final String charset) {
this.charset = charset;
}
public String getCharset() {
return charset;
}
public void setOmitComments(final boolean omitComments) {
this.omitComments = omitComments;
}
public boolean isOmitComments() {
return omitComments;
}
public void setFilter(final boolean filter) {
this.filter = filter;
}
public boolean isFilter() {
return filter;
}
public void setWhitelistElements(final List<Element> whitelistElements) {
this.whitelistElements = whitelistElements;
}
public List<Element> getWhitelistElements() {
return whitelistElements;
}
public boolean isConvertLineEndings() {
return convertLineEndings;
}
public void setConvertLineEndings(final boolean convertLineEndings) {
this.convertLineEndings = convertLineEndings;
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.onehippo.cms7.services.HippoServiceRegistry;
import org.onehippo.cms7.services.processor.html.visit.TagVisitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public interface HtmlProcessorFactory extends Serializable {
Logger log = LoggerFactory.getLogger(HtmlProcessorFactory.class);
String RICHTEXT_PROCESSOR_SERVICE = "richtext";
String FORMATTED_HTML_PROCESSOR_SERVICE = "formatted";
String DEFAULT_HTML_PROCESSOR_SERVICE = "no-filter";
String DEPRECATED_FORMATTED_HTMLCLEANER_ID = "org.hippoecm.frontend.plugins.richtext.DefaultHtmlCleanerService";
String DEPRECATED_RICHTEXT_HTMLCLEANER_ID = "org.hippoecm.frontend.plugins.richtext.IHtmlCleanerService";
HtmlProcessor NOOP = new HtmlProcessor() {
@Override
public String read(final String html, final List<TagVisitor> ignore) throws IOException {
return html;
}
@Override
public String write(final String html, final List<TagVisitor> ignore) throws IOException {
return html;
}
};
HtmlProcessor getProcessor();
static HtmlProcessorFactory of(final String id) {
return () -> {
final String processorId = parseProcessorId(id);
final HtmlProcessorService service = HippoServiceRegistry.getService(HtmlProcessorService.class);
if (service == null) {
log.warn("Could not load HtmlProcessorService, returning NOOP HtmlProcessor");
return NOOP;
}
final HtmlProcessor processor = service.getHtmlProcessor(processorId);
if (processor == null) {
log.warn("Could not load HtmlProcessor with id '{}', returning NOOP HtmlProcessor", processorId);
return NOOP;
}
return processor;
};
}
static String parseProcessorId(String id) {
if (StringUtils.isBlank(id)) {
log.info("CKEditor plugin does not have a server-side HTML processor configured, using default");
return DEFAULT_HTML_PROCESSOR_SERVICE;
}
if (id.equals(DEPRECATED_RICHTEXT_HTMLCLEANER_ID)) {
log.warn("HtmlProcessor id '{}' has been replaced by '{}', please update the configuration.",
DEPRECATED_RICHTEXT_HTMLCLEANER_ID, RICHTEXT_PROCESSOR_SERVICE);
return RICHTEXT_PROCESSOR_SERVICE;
} else if (id.equals(DEPRECATED_FORMATTED_HTMLCLEANER_ID)) {
log.warn("HtmlProcessor id '{}' has been replaced by '{}', please update the configuration.",
DEPRECATED_FORMATTED_HTMLCLEANER_ID, FORMATTED_HTML_PROCESSOR_SERVICE);
return FORMATTED_HTML_PROCESSOR_SERVICE;
}
return id;
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import javax.jcr.RepositoryException;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.Serializer;
import org.htmlcleaner.TagNode;
import org.onehippo.cms7.services.processor.html.filter.HtmlFilter;
import org.onehippo.cms7.services.processor.html.filter.WhitelistHtmlFilter;
import org.onehippo.cms7.services.processor.html.serialize.HtmlSerializerFactory;
import org.onehippo.cms7.services.processor.html.util.StringUtil;
import org.onehippo.cms7.services.processor.html.visit.Tag;
import org.onehippo.cms7.services.processor.html.visit.TagVisitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HtmlProcessorImpl implements HtmlProcessor {
public static final Logger log = LoggerFactory.getLogger(HtmlProcessorImpl.class);
private HtmlProcessorConfig config;
private HtmlCleaner parser;
private HtmlFilter filter;
private Serializer serializer;
public HtmlProcessorImpl(final HtmlProcessorConfig config) {
this.config = config;
init();
}
private void init() {
final CleanerProperties properties = new CleanerProperties();
properties.setOmitHtmlEnvelope(true);
properties.setOmitXmlDeclaration(true);
properties.setOmitComments(config.isOmitComments());
parser = new HtmlCleaner(properties);
filter = new WhitelistHtmlFilter(config.getWhitelistElements());
serializer = HtmlSerializerFactory.create(config.getSerializer(), properties);
}
@Override
public String read(final String html, final List<TagVisitor> visitors) throws IOException {
final TagNode node = parse(html);
visit(node, visitors, TagVisitor::visitBeforeRead);
String serialized = serialize(node);
if (config.isConvertLineEndings()) {
serialized = StringUtil.convertLfToCrlf(serialized);
}
return serialized;
}
@Override
public String write(final String html, final List<TagVisitor> visitors) throws IOException {
TagNode node = parse(html);
if (config.isFilter() && filter != null) {
node = filter.apply(node);
}
visit(node, visitors, TagVisitor::visitBeforeWrite);
String serialized = serialize(node);
if (config.isConvertLineEndings()) {
serialized = StringUtil.convertCrlfToLf(serialized);
}
return serialized;
}
private void visit(final TagNode node, final List<TagVisitor> visitors, final Visit visit) {
if (visitors != null && !visitors.isEmpty()) {
node.traverse((parentNode, htmlNode) -> {
final Tag parent = Tag.from(parentNode);
final Tag tag = Tag.from(htmlNode);
visitors.forEach(visitor -> {
try {
visit.apply(visitor, parent, tag);
} catch (final RepositoryException e) {
log.info(e.getMessage(), e);
}
});
return true;
});
}
}
private TagNode parse(String html) {
if (html == null) {
html = "";
}
return parser.clean(html);
}
private String serialize(final TagNode html) throws IOException {
if (html == null) {
return "";
}
final StringWriter writer = new StringWriter();
serializer.write(html, writer, config.getCharset());
return writer.getBuffer().toString().trim();
}
private interface Visit {
void apply(final TagVisitor visitor, final Tag parent, final Tag tag) throws RepositoryException;
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
import org.onehippo.cms7.services.SingletonService;
@SingletonService
public interface HtmlProcessorService {
/**
* Returns instance of HtmlProcessor or null if the configuration cannot be found
* @param id The HTML processor id
* @return Instance of HTML processor
*/
HtmlProcessor getHtmlProcessor(final String id);
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HtmlProcessorServiceConfig implements Serializable {
public static final Logger log = LoggerFactory.getLogger(HtmlProcessorServiceConfig.class);
private Map<String, HtmlProcessorConfig> configs;
private Map<String, HtmlProcessor> processors;
void reconfigure(final Node config) {
configs = new HashMap<>();
processors = new HashMap<>();
try {
final NodeIterator iterator = config.getNodes();
while (iterator.hasNext()) {
final Node child = iterator.nextNode();
HtmlProcessorConfig processorConfig = new HtmlProcessorConfig();
processorConfig.reconfigure(child);
configs.put(child.getName(), processorConfig);
}
} catch (RepositoryException e) {
log.error("Failed to create Html processor config");
}
}
/**
* Returns instance of HtmlProcessor or null if the configuration cannot be found
* @param id The HTML processor id
* @return Instance of HTML processor
*/
HtmlProcessor getProcessor(final String id) {
if (!processors.containsKey(id)) {
if (configs.containsKey(id)) {
processors.put(id, new HtmlProcessorImpl(configs.get(id)));
}
}
return processors.get(id);
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.processor.html;
public class HtmlProcessorServiceImpl implements HtmlProcessorService {
private final HtmlProcessorServiceConfig config;
public HtmlProcessorServiceImpl(final HtmlProcessorServiceConfig config) {
this.config = config;
}
@Override
public HtmlProcessor getHtmlProcessor(final String id) {
return config.getProcessor(id);
}
}