Commit 0899d8cc authored by Matija Obreza's avatar Matija Obreza
Browse files

Column definitions and initial validators

parent 6934842e
package org.genesys2.anno.model;
import java.util.HashSet;
import java.util.Set;
public class Column {
private String preferredName;
private String rdfTerm;
private String description;
private ColumnDataType dataType;
private boolean unique;
private boolean multiple;
private String pattern;
private Set<ColumnValidator> validators = new HashSet<ColumnValidator>();
public Column() {
}
public Column(String preferredName, ColumnDataType dataType,
boolean multiple, boolean unique, String rdfTerm) {
this.preferredName = preferredName;
this.dataType = dataType;
this.multiple = multiple;
this.unique = unique;
this.rdfTerm = rdfTerm;
}
public String getPreferredName() {
return preferredName;
}
public void setPreferredName(String preferredName) {
this.preferredName = preferredName;
}
public String getRdfTerm() {
return rdfTerm;
}
public void setRdfTerm(String rdfTerm) {
this.rdfTerm = rdfTerm;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public ColumnDataType getDataType() {
return dataType;
}
public void setDataType(ColumnDataType dataType) {
this.dataType = dataType;
}
public boolean isUnique() {
return unique;
}
public void setUnique(boolean unique) {
this.unique = unique;
}
public boolean isMultiple() {
return multiple;
}
public void setMultiple(boolean multiple) {
this.multiple = multiple;
}
public Set<ColumnValidator> getValidators() {
return validators;
}
public void setValidators(Set<ColumnValidator> validators) {
this.validators = validators;
}
public String getPattern() {
return pattern;
}
public void setPattern(String pattern) {
this.pattern = pattern;
}
}
package org.genesys2.anno.model;
public enum ColumnDataType {
TEXT, DATE, DATETIME, WIEWSINSTCODE, INTEGER, DOUBLE
}
package org.genesys2.anno.model;
public interface ColumnValidator {
public boolean isValid(Column columnn, String value);
}
package org.genesys2.anno.model;
public class RdfMCPD {
public static final String INSTCODE = "http://purl/instCode";
public static final String ACCENUMB = "http://purl/acceNumb";
public static final String GENUS = "http://some.url";
public static final String SPECIES = "http://some.other.url";
}
package org.genesys2.anno.model;
/**
* Validate values in multiple columns
*
* @author matijaobreza
*
*/
public interface RowValidator {
public boolean isValid(Column[] columns, String[] values);
}
package org.genesys2.anno.predefined;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.genesys2.anno.model.Column;
import org.genesys2.anno.model.ColumnDataType;
import org.genesys2.anno.model.RdfMCPD;
import org.genesys2.anno.validator.FaoWiewsInstCodeValidator;
public class MCPD {
private final Set<Column> columns;
public MCPD() {
Set<Column> columns = new HashSet<Column>();
Column instCode = new Column("INSTCODE", ColumnDataType.TEXT, false,
false, RdfMCPD.INSTCODE);
instCode.getValidators().add(new FaoWiewsInstCodeValidator());
columns.add(instCode);
Column acceNumb = new Column("ACCENUMB", ColumnDataType.TEXT, false,
true, RdfMCPD.ACCENUMB);
columns.add(acceNumb);
Column genus = new Column("GENUS", ColumnDataType.TEXT, false, false,
RdfMCPD.GENUS);
columns.add(genus);
Column species = new Column("SPECIES", ColumnDataType.TEXT, false,
false, RdfMCPD.SPECIES);
columns.add(species);
this.columns = Collections.unmodifiableSet(columns);
}
public final Set<Column> getColumns() {
return columns;
}
}
package org.genesys2.anno.validator;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.genesys2.anno.model.Column;
import org.genesys2.anno.model.ColumnValidator;
/**
* Format: ISO3 code + at least three digits
*
* @author matijaobreza
*
*/
public class FaoWiewsInstCodeValidator implements ColumnValidator {
@Override
public boolean isValid(Column columnn, String value) {
if (StringUtils.isBlank(value)) {
return true;
}
// TODO FIXME Should check against actual FAO WIEWS institute codes!
return Pattern.matches("^(\\w){3}(\\d){3}\\d*$", value);
}
}
package org.genesys2.anno.validator;
import org.apache.commons.lang3.StringUtils;
import org.genesys2.anno.model.Column;
import org.genesys2.anno.model.RdfMCPD;
import org.genesys2.anno.model.RowValidator;
public class GenusSpeciesValidator implements RowValidator {
@Override
public boolean isValid(Column[] columns, String[] values) {
if (columns == null || values == null) {
return true;
}
boolean hasGenus = false, hasSpecies = false;
String genusValue = null, speciesValue = null;
// find genus in columns
for (int i = 0; genusValue == null && i < columns.length; i++) {
Column column = columns[i];
if (column.getRdfTerm().equals(RdfMCPD.GENUS)) {
hasGenus = true;
genusValue = values.length > i ? values[i] : null;
}
}
// find species in columns
for (int i = 0; genusValue == null && i < columns.length; i++) {
Column column = columns[i];
if (column.getRdfTerm().equals(RdfMCPD.SPECIES)) {
hasSpecies = true;
speciesValue = values.length > i ? values[i] : null;
}
}
if (hasGenus && hasSpecies && StringUtils.isNotBlank(genusValue)
&& StringUtils.isNotBlank(speciesValue)) {
return true;
}
return false;
}
}
package org.genesys2.anno.validator;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.genesys2.anno.model.Column;
import org.genesys2.anno.model.ColumnValidator;
/**
* Validator caches regular expression patterns and evaluates input values
* against regular expressions. Null input values and blank patterns always
* validate.
*
* @author matijaobreza
*/
public class RegexpValidator implements ColumnValidator {
private HashMap<String, Pattern> patternCache = new HashMap<String, Pattern>(
100);
@Override
public boolean isValid(Column column, String value) {
if (StringUtils.isBlank(value)) {
return true;
}
if (column == null || StringUtils.isBlank(column.getPattern())) {
return true;
}
Pattern p = ensureMatcher(column.getPattern());
Matcher m = p.matcher(value);
return m.matches();
}
private synchronized Pattern ensureMatcher(String expression) {
Pattern pattern = patternCache.get(expression);
if (pattern == null) {
patternCache.put(expression, pattern = Pattern.compile(expression));
}
return pattern;
}
}
package org.genesys2.anno.validator;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.genesys2.anno.model.ColumnValidator;
import org.junit.Test;
/**
* Test validation of FAO WIEWS codes
*
* @author matijaobreza
*
*/
public class FaoWiewsInstCodeTest {
@Test
public void testBlanks() {
ColumnValidator validator = new FaoWiewsInstCodeValidator();
// Null is valid
assertTrue(validator.isValid(null, null));
// Blanks are valid
assertTrue(validator.isValid(null, " "));
}
@Test
public void testTrim() {
ColumnValidator validator = new FaoWiewsInstCodeValidator();
// value must be trimmed
assertFalse(validator.isValid(null, " NGA03911 "));
// value must be trimmed
assertTrue(validator.isValid(null, "NGA03911"));
// value must be trimmed
assertFalse(validator.isValid(null, "NGA03911 "));
// value must be trimmed
assertFalse(validator.isValid(null, "NGA03911\t"));
}
@Test
public void testFormat() {
ColumnValidator validator = new FaoWiewsInstCodeValidator();
// Format matches
assertFalse(validator.isValid(null, "123"));
assertFalse(validator.isValid(null, "USA"));
assertFalse(validator.isValid(null, "1234USA"));
assertFalse(validator.isValid(null, "USA1"));
assertFalse(validator.isValid(null, "AVQ12"));
assertTrue(validator.isValid(null, "NGA039"));
assertFalse(validator.isValid(null, "NGA 039"));
assertTrue(validator.isValid(null, "NGA0391"));
assertTrue(validator.isValid(null, "NGA03911"));
}
}
package org.genesys2.anno.validator;
import static org.junit.Assert.assertTrue;
import org.genesys2.anno.model.RowValidator;
import org.junit.Test;
/**
* Test validation of FAO WIEWS codes
*
* @author matijaobreza
*
*/
public class GenusSpeciesValidatorTest {
@Test
public void testBlankValue() {
RowValidator validator = new GenusSpeciesValidator();
assertTrue(validator.isValid(null, null));
}
}
package org.genesys2.anno.validator;
import static org.junit.Assert.assertTrue;
import org.genesys2.anno.model.Column;
import org.genesys2.anno.model.ColumnValidator;
import org.junit.Test;
/**
* Test validation of FAO WIEWS codes
*
* @author matijaobreza
*
*/
public class RegexpTest {
@Test
public void testBlankValue() {
ColumnValidator validator = new RegexpValidator();
assertTrue(validator.isValid(null, null));
assertTrue(validator.isValid(null, " "));
assertTrue(validator.isValid(null, " \r\t\n\t "));
assertTrue(validator.isValid(new Column(), " \r\t\n\t "));
Column column=new Column();
column.setPattern("\\w+");
assertTrue(validator.isValid(column, " \r\t\n\t "));
}
@Test
public void testNullColumn() {
ColumnValidator validator = new RegexpValidator();
assertTrue(validator.isValid(null, "askjdha asdkljhas laksj1 12312"));
}
@Test
public void testBlankPattern() {
ColumnValidator validator = new RegexpValidator();
Column column=new Column();
column.setPattern(null);
assertTrue(validator.isValid(column, "askjdha asdkljhas laksj1 12312"));
column.setPattern(" ");
assertTrue(validator.isValid(column, "askjdha asdkljhas laksj1 12312"));
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment