Commit 1ea729a9 authored by Ate Douma's avatar Ate Douma

TRIVIAL cleanup master

parent 248e0dc4
/*.iml
/.classpath
/.idea
/.project
/.settings
/target
This diff is collapsed.
Hippo CMS7 Services - htmlcleaner
Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com)
This product includes software developed by:
Hippo B.V., Amsterdam, The Netherlands (http://www.onehippo.com/);
The Apache Software Foundation (http://www.apache.org/).
NOTICE: Only our own original work is licensed under the terms of the
Apache License Version 2.0. The licenses of some libraries might impose
different redistribution or general licensing terms than those stated in the
Apache License. Users and redistributors are hereby requested to verify these
conditions and agree upon them.
# This is not the branch you're looking for...
BloomReach only provides the git trees for the release tags of Hippo CMS, as explained on https://www.onehippo.org/about/open-source-release-policy.html
To checkout the code for a specific release tag, after cloning this repository, use the following:
## to show the available tags
git tag
## to checkout a specific tag
git checkout <tag name>
## to modify a project
If you want to make modifications to a project, for example to create a patch, create a new fork branch from the specific tag like this:
git checkout -b forked-<tag name> <tag name>
For the latter, also see the **Build from Source** documentation at https://www.onehippo.org/library/development/build-hippo-cms-from-scratch.html
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS"
BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-cms7-project</artifactId>
<version>29.4-SNAPSHOT</version>
</parent>
<name>Hippo CMS7 Services - htmlprocessor</name>
<artifactId>hippo-services-htmlprocessor</artifactId>
<version>1.3.0-SNAPSHOT</version>
<inceptionYear>2017</inceptionYear>
<properties>
<!-- use root project name for all project modules NOTICE files, should be the same as in the root NOTICE file -->
<notice.project.name>Hippo CMS7 Services - htmlprocessor</notice.project.name>
<!-- runtime dependencies -->
<hippo.repository.version>5.3.0-SNAPSHOT</hippo.repository.version>
<hippo.services.version>4.3.0-SNAPSHOT</hippo.services.version>
<commons-lang.version>2.6</commons-lang.version>
<sf.htmlcleaner.version>2.21</sf.htmlcleaner.version>
<!-- test dependencies -->
<easymock.version>3.0</easymock.version>
<junit.version>4.11</junit.version>
</properties>
<scm>
<connection>scm:git:https://code.onehippo.org/cms-community/hippo-services-htmlprocessor.git</connection>
<developerConnection>scm:git:git@code.onehippo.org:cms-community/hippo-services-htmlprocessor.git</developerConnection>
<url>https://code.onehippo.org/cms-community/hippo-services-htmlprocessor</url>
</scm>
<repositories>
<repository>
<id>hippo</id>
<name>Hippo Maven 2</name>
<url>https://maven.onehippo.com/maven2/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<updatePolicy>never</updatePolicy>
</releases>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-repository-api</artifactId>
<version>${hippo.repository.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-services</artifactId>
<version>${hippo.services.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>${sf.htmlcleaner.version}</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>${commons-lang.version}</version>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<version>${easymock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.onehippo.cms7</groupId>
<artifactId>hippo-repository-testutils</artifactId>
<version>${hippo.repository.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>
<profile>
<id>pedantic</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<inherited>false</inherited>
<configuration>
<excludes combine.children="append">
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.RepositoryException;
import org.hippoecm.repository.util.JcrUtils;
import org.onehippo.cms7.services.htmlprocessor.filter.Element;
import org.onehippo.cms7.services.htmlprocessor.serialize.HtmlSerializer;
public class HtmlProcessorConfig implements Serializable {
private static final String DEFAULT_CHARSET = "UTF-8";
private static final boolean DEFAULT_FILTER = false;
private static final boolean DEFAULT_OMIT_COMMENTS = false;
private static final boolean DEFAULT_OMIT_JAVASCRIPT_PROTOCOL = true;
private static final boolean DEFAULT_CONVERT_LINE_ENDINGS = true;
private static final HtmlSerializer DEFAULT_SERIALIZER = HtmlSerializer.SIMPLE;
// repository property names
private static final String CHARSET = "charset";
private static final String OMIT_COMMENTS = "omitComments";
private static final String OMIT_JAVASCRIPT_PROTOCOL = "omitJavascriptProtocol";
private static final String CONVERT_LINE_ENDINGS = "convertLineEndings";
private static final String SERIALIZER = "serializer";
private static final String FILTER = "filter";
private static final String ATTRIBUTES = "attributes";
private String charset;
private HtmlSerializer serializer;
private boolean omitComments;
private boolean omitJavascriptProtocol;
private boolean filter;
private boolean convertLineEndings;
private List<Element> whitelistElements;
public HtmlProcessorConfig() {
charset = DEFAULT_CHARSET;
filter = DEFAULT_FILTER;
convertLineEndings = DEFAULT_CONVERT_LINE_ENDINGS;
serializer = DEFAULT_SERIALIZER;
omitComments = DEFAULT_OMIT_COMMENTS;
omitJavascriptProtocol = DEFAULT_OMIT_JAVASCRIPT_PROTOCOL;
}
public void reconfigure(final Node node) throws RepositoryException {
charset = JcrUtils.getStringProperty(node, CHARSET, DEFAULT_CHARSET);
convertLineEndings = JcrUtils.getBooleanProperty(node, CONVERT_LINE_ENDINGS, DEFAULT_CONVERT_LINE_ENDINGS);
filter = JcrUtils.getBooleanProperty(node, FILTER, DEFAULT_FILTER);
omitComments = JcrUtils.getBooleanProperty(node, OMIT_COMMENTS, DEFAULT_OMIT_COMMENTS);
omitJavascriptProtocol = JcrUtils.getBooleanProperty(node, OMIT_JAVASCRIPT_PROTOCOL, DEFAULT_OMIT_JAVASCRIPT_PROTOCOL);
final String serializerName = JcrUtils.getStringProperty(node, SERIALIZER, DEFAULT_SERIALIZER.name());
serializer = HtmlSerializer.valueOfOrDefault(serializerName);
if (node.hasNodes()) {
final String[] emptyAttr = new String[]{};
whitelistElements = new ArrayList<>();
final NodeIterator filters = node.getNodes();
while (filters.hasNext()) {
final Node filterNode = filters.nextNode();
final String[] attributes = JcrUtils.getMultipleStringProperty(filterNode, ATTRIBUTES, emptyAttr);
final String configName = filterNode.getName();
final int offset = configName.lastIndexOf('.');
final String elementName = offset != -1 ? configName.substring(offset + 1) : configName;
final Element element = Element.create(elementName, attributes);
whitelistElements.add(element);
}
}
}
public void setSerializer(final HtmlSerializer serializer) {
this.serializer = serializer;
}
public HtmlSerializer getSerializer() {
return serializer;
}
public void setCharset(final String charset) {
this.charset = charset;
}
public String getCharset() {
return charset;
}
public void setOmitComments(final boolean omitComments) {
this.omitComments = omitComments;
}
public boolean isOmitComments() {
return omitComments;
}
public void setFilter(final boolean filter) {
this.filter = filter;
}
public boolean isFilter() {
return filter;
}
public void setWhitelistElements(final List<Element> whitelistElements) {
this.whitelistElements = whitelistElements;
}
public List<Element> getWhitelistElements() {
return whitelistElements;
}
public boolean isConvertLineEndings() {
return convertLineEndings;
}
public void setConvertLineEndings(final boolean convertLineEndings) {
this.convertLineEndings = convertLineEndings;
}
public boolean isOmitJavascriptProtocol() {
return omitJavascriptProtocol;
}
public void setOmitJavascriptProtocol(final boolean omitJsProtocol) {
this.omitJavascriptProtocol = omitJsProtocol;
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.onehippo.cms7.services.HippoServiceRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public interface HtmlProcessorFactory extends Serializable {
Logger log = LoggerFactory.getLogger(HtmlProcessorFactory.class);
String RICHTEXT_PROCESSOR_SERVICE = "richtext";
String FORMATTED_HTML_PROCESSOR_SERVICE = "formatted";
String DEFAULT_HTML_PROCESSOR_SERVICE = "no-filter";
String DEPRECATED_FORMATTED_HTMLCLEANER_ID = "org.hippoecm.frontend.plugins.richtext.DefaultHtmlCleanerService";
String DEPRECATED_RICHTEXT_HTMLCLEANER_ID = "org.hippoecm.frontend.plugins.richtext.IHtmlCleanerService";
HtmlProcessor NOOP = new HtmlProcessor() {
@Override
public String read(final String html, final List<TagVisitor> ignore) throws IOException {
return html;
}
@Override
public String write(final String html, final List<TagVisitor> ignore) throws IOException {
return html;
}
};
HtmlProcessor getProcessor();
static HtmlProcessorFactory of(final String id) {
return () -> {
final String processorId = parseProcessorId(id);
final HtmlProcessorService service = HippoServiceRegistry.getService(HtmlProcessorService.class);
if (service == null) {
log.warn("Could not load HtmlProcessorService, returning NOOP HtmlProcessor");
return NOOP;
}
final HtmlProcessor processor = service.getHtmlProcessor(processorId);
if (processor == null) {
log.warn("Could not load HtmlProcessor with id '{}', returning NOOP HtmlProcessor", processorId);
return NOOP;
}
return processor;
};
}
static String parseProcessorId(final String id) {
if (StringUtils.isBlank(id)) {
log.info("CKEditor plugin does not have a server-side HTML processor configured, using default");
return DEFAULT_HTML_PROCESSOR_SERVICE;
}
if (id.equals(DEPRECATED_RICHTEXT_HTMLCLEANER_ID)) {
log.warn("HtmlProcessor id '{}' has been replaced by '{}', please update the configuration.",
DEPRECATED_RICHTEXT_HTMLCLEANER_ID, RICHTEXT_PROCESSOR_SERVICE);
return RICHTEXT_PROCESSOR_SERVICE;
} else if (id.equals(DEPRECATED_FORMATTED_HTMLCLEANER_ID)) {
log.warn("HtmlProcessor id '{}' has been replaced by '{}', please update the configuration.",
DEPRECATED_FORMATTED_HTMLCLEANER_ID, FORMATTED_HTML_PROCESSOR_SERVICE);
return FORMATTED_HTML_PROCESSOR_SERVICE;
}
return id;
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import javax.jcr.RepositoryException;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.Serializer;
import org.htmlcleaner.TagNode;
import org.onehippo.cms7.services.htmlprocessor.filter.HtmlFilter;
import org.onehippo.cms7.services.htmlprocessor.filter.WhitelistHtmlFilter;
import org.onehippo.cms7.services.htmlprocessor.serialize.HtmlSerializerFactory;
import org.onehippo.cms7.services.htmlprocessor.util.StringUtil;
import org.onehippo.cms7.services.htmlprocessor.visit.HtmlTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HtmlProcessorImpl implements HtmlProcessor {
public static final Logger log = LoggerFactory.getLogger(HtmlProcessorImpl.class);
private final HtmlProcessorConfig config;
private final HtmlCleaner parser;
private final HtmlFilter filter;
private final Serializer serializer;
public HtmlProcessorImpl(final HtmlProcessorConfig config) {
this.config = config;
final CleanerProperties properties = new CleanerProperties();
properties.setOmitHtmlEnvelope(true);
properties.setOmitXmlDeclaration(true);
properties.setOmitComments(config.isOmitComments());
parser = new HtmlCleaner(properties);
filter = new WhitelistHtmlFilter(config.getWhitelistElements(), config.isOmitJavascriptProtocol());
serializer = HtmlSerializerFactory.create(config.getSerializer(), properties);
}
@Override
public String read(final String html, final List<TagVisitor> visitors) throws IOException {
final TagNode node = parse(html);
visit(node, visitors, TagVisitor::onRead);
String serialized = serialize(node);
if (config.isConvertLineEndings()) {
serialized = StringUtil.convertLfToCrlf(serialized);
}
return serialized;
}
@Override
public String write(final String html, final List<TagVisitor> visitors) throws IOException {
TagNode node = parse(html);
visit(node, visitors, TagVisitor::onWrite);
if (config.isFilter() && filter != null) {
node = filter.apply(node);
}
String serialized = serialize(node);
if (config.isConvertLineEndings()) {
serialized = StringUtil.convertCrlfToLf(serialized);
}
return serialized;
}
private void visit(final TagNode node, final List<TagVisitor> visitors, final Visit visit) {
if (visitors != null && !visitors.isEmpty()) {
visitors.forEach(TagVisitor::before);
node.traverse((parentNode, htmlNode) -> {
final Tag parent = HtmlTag.from(parentNode);
final Tag tag = HtmlTag.from(htmlNode);
visitors.forEach(visitor -> {
try {
visit.apply(visitor, parent, tag);
} catch (final RepositoryException e) {
log.info(e.getMessage(), e);
}
});
return true;
});
visitors.forEach(TagVisitor::after);
}
}
private TagNode parse(String html) {
if (html == null) {
html = "";
}
return parser.clean(html);
}
private String serialize(final TagNode html) throws IOException {
if (html == null) {
return "";
}
final StringWriter writer = new StringWriter();
serializer.write(html, writer, config.getCharset());
return writer.getBuffer().toString().trim();
}
private interface Visit {
void apply(final TagVisitor visitor, final Tag parent, final Tag tag) throws RepositoryException;
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HtmlProcessorServiceConfig implements Serializable {
public static final Logger log = LoggerFactory.getLogger(HtmlProcessorServiceConfig.class);
private final Map<String, HtmlProcessorConfig> configs = new HashMap<>();
private final Map<String, HtmlProcessor> processors = new HashMap<>();
void reconfigure(final Node config) {
configs.clear();
processors.clear();
try {
final NodeIterator iterator = config.getNodes();
while (iterator.hasNext()) {
final Node child = iterator.nextNode();
final HtmlProcessorConfig processorConfig = new HtmlProcessorConfig();
processorConfig.reconfigure(child);
configs.put(child.getName(), processorConfig);
}
} catch (final RepositoryException e) {
log.error("Failed to create Html processor config");
}
}
/**
* Returns instance of HtmlProcessor or null if the configuration cannot be found
* @param id The HTML processor id
* @return Instance of HTML processor
*/
HtmlProcessor getProcessor(final String id) {
if (!processors.containsKey(id)) {
if (configs.containsKey(id)) {
processors.put(id, new HtmlProcessorImpl(configs.get(id)));
}
}
return processors.get(id);
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor;
public class HtmlProcessorServiceImpl implements HtmlProcessorService {
private final HtmlProcessorServiceConfig config;
public HtmlProcessorServiceImpl(final HtmlProcessorServiceConfig config) {
this.config = config;
}
@Override
public HtmlProcessor getHtmlProcessor(final String id) {
return config.getProcessor(id);
}
}
/*
* Copyright 2017 Hippo B.V. (http://www.onehippo.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onehippo.cms7.services.htmlprocessor;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import org.onehippo.cms7.services.HippoServiceRegistry;
import org.onehippo.repository.modules.AbstractReconfigurableDaemonModule;
import org.onehippo.repository.modules.ProvidesService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;