Commit ecf4fc8e authored by Matija Obreza's avatar Matija Obreza

HtmlSanitizer using owasp-java-html-sanitizer

parent efeb54f7
......@@ -348,6 +348,11 @@
<artifactId>tomcat-jdbc</artifactId>
<version>7.0.42</version>
</dependency>
<dependency>
<groupId>com.googlecode.owasp-java-html-sanitizer</groupId>
<artifactId>owasp-java-html-sanitizer</artifactId>
<version>r209</version>
</dependency>
</dependencies>
<build>
......
......@@ -23,6 +23,8 @@ import org.genesys2.server.model.BusinessModel;
import org.genesys2.server.model.impl.ActivityPost;
import org.genesys2.server.model.impl.Article;
import org.genesys2.server.model.impl.ClassPK;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
public interface ContentService {
......@@ -52,4 +54,7 @@ public interface ContentService {
*/
Article getGlobalArticle(String slug, Locale locale);
Page<Article> listArticles(Pageable pageable);
void save(Iterable<Article> articles);
}
/**
* Copyright 2013 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.genesys2.server.service;
/**
* HTML sanitizer
*
* @author mobreza
*/
public interface HtmlSanitizer {
String sanitize(String html);
}
package org.genesys2.server.service.impl;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.genesys2.server.model.impl.Article;
import org.genesys2.server.service.ContentService;
import org.genesys2.server.service.HtmlSanitizer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Component;
@Component
public class ContentSanitizer {
public static final Log LOG = LogFactory.getLog(ContentSanitizer.class);
@Autowired
private ContentService contentService;
@Autowired
private HtmlSanitizer htmlSanitizer;
@PreAuthorize("hasRole('ADMINISTRATOR')")
public void sanitizeAll() {
LOG.info("Sanitizing articles");
Page<Article> articles;
int page = 0;
do {
articles = contentService.listArticles(new PageRequest(page++, 10));
for (Article a : articles) {
a.setBody(htmlSanitizer.sanitize(a.getBody()));
}
contentService.save(articles.getContent());
} while (articles.hasNextPage());
}
}
......@@ -30,7 +30,9 @@ import org.genesys2.server.persistence.domain.ArticleRepository;
import org.genesys2.server.persistence.domain.ClassPKRepository;
import org.genesys2.server.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort.Direction;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
......@@ -55,6 +57,17 @@ public class ContentServiceImpl implements ContentService {
return postRepository.findAll(page).getContent();
}
@Override
public Page<Article> listArticles(Pageable pageable) {
return articleRepository.findAll(pageable);
}
@Override
@Transactional(readOnly = false)
public void save(Iterable<Article> articles) {
articleRepository.save(articles);
}
@Override
public Article getGlobalArticle(String slug, Locale locale) {
return getArticle(Article.class, null, slug, locale);
......
/**
* Copyright 2013 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.genesys2.server.service.impl;
import java.util.regex.Pattern;
import org.genesys2.server.service.HtmlSanitizer;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.PolicyFactory;
import org.springframework.stereotype.Service;
/**
* HTML sanitizer using owasp-java-html-sanitizer
*
* @author mobreza
*
*/
@Service
public class OWASPSanitizer implements HtmlSanitizer {
private PolicyFactory POLICY = new HtmlPolicyBuilder()
.allowStandardUrlProtocols()
// Allow title attr
.allowAttributes("title")
.globally()
// Href on links
.allowAttributes("href")
.onElements("a")
// Defeat link spammers.
.requireRelNofollowOnLinks()
// Allow lang= with an alphabetic value on any element.
.allowAttributes("lang")
.matching(Pattern.compile("[a-zA-Z]{2,20}"))
.globally()
// Align
.allowAttributes("align")
.matching(true, "center", "left", "right", "justify", "char")
.onElements("p", "table")
// Elements
.allowElements("table", "thead", "tbody", "tr", "td", "th", "tfoot", "a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong", "br", "ul",
"ol", "li", "h2", "h3", "h4", "small")
// Get factory
.toFactory();
@Override
public String sanitize(String html) {
return html == null ? null : POLICY.sanitize(html);
};
}
......@@ -23,6 +23,7 @@ import org.apache.commons.logging.LogFactory;
import org.genesys2.server.service.GenesysService;
import org.genesys2.server.service.GeoService;
import org.genesys2.server.service.LuceneIndexer;
import org.genesys2.server.service.impl.ContentSanitizer;
import org.genesys2.server.service.impl.InstituteUpdater;
import org.genesys2.server.service.impl.SGSVInsertMissing;
import org.genesys2.server.service.impl.SGSVUpdate;
......@@ -55,6 +56,9 @@ public class AdminController {
@Autowired
SGSVInsertMissing sgsvImporter;
@Autowired
ContentSanitizer contentSanitizer;
@RequestMapping("/")
public String root() {
return "/admin/index";
......@@ -115,4 +119,12 @@ public class AdminController {
sgsvImporter.importMissingSGSV();
return "redirect:/admin/";
}
@RequestMapping(method = RequestMethod.POST, value = "/sanitize")
public String sanitize() {
LOG.info("Sanitizing content");
contentSanitizer.sanitizeAll();
LOG.info("Sanitizing content.. Done");
return "redirect:/admin/";
}
}
......@@ -35,6 +35,10 @@
<input type="submit" value="Update accession institute info" />
</form>
<form method="post" action="<c:url value="/admin/sanitize" />">
<input type="submit" value="Sanitize HTML content" />
</form>
<form method="post" action="<c:url value="/admin/reindexEntity" />">
<select name="entity">
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment