Commit 6ebdf2cf authored by Matija Obreza's avatar Matija Obreza
Browse files

SUBTAUTHOR_check implemented (fixes #7)

parent 30650fb0
......@@ -29,6 +29,7 @@ import org.genesys2.gringlobal.taxonomy.model.SpeciesRow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
// TODO: Auto-generated Javadoc
/**
* {@link List} based in-memory "database". NOT THREAD-SAFE!
*/
......@@ -281,6 +282,27 @@ public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {
.map(speciesRow -> speciesRow.getSpeciesAuthority()).orElse(null);
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#containsSubtaxa(java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public boolean containsSubtaxa(String genus, String species, String subtaxa) {
LOG.trace("Does database contain genus={} species={}", genus, species);
if (!genusIdLookup.containsKey(genus)) {
return false;
}
return getAllGenusSpecies(genus).stream()
// keep matching speciesRows within genues
.filter(speciesRow -> species.equals(speciesRow.getSpeciesName()))
// return true if one row has exact match on SUBTAXA
.anyMatch(speciesRow -> subtaxa.equals(speciesRow.getSubtaxa()));
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#findSimilarSubtaxa(java.lang.String, java.lang.String, java.lang.String, int)
*/
@Override
public List<String> findSimilarSubtaxa(String genus, String species, String subtaxa, int maxSize) {
LOG.debug("Searching similar subtaxa for genus={} species={} subtaxa={}", genus, species, subtaxa);
......@@ -322,4 +344,27 @@ public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {
// limit results to maxSize
.limit(maxSize).collect(Collectors.toList());
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#getSubtaxaAuthority(java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public String getSubtaxaAuthority(String genus, String species, String subtaxa) {
List<Long> genusId = genusIdLookup.get(genus);
if (genusId == null) {
return null;
}
return getAllGenusSpecies(genus).stream()
// keep rows with matching species
.filter(speciesRow -> species.equals(speciesRow.getSpeciesName()))
// keep rows with matching subtaxa
.filter(speciesRow -> subtaxa.equals(speciesRow.getSubtaxa()))
// debug print
.peek(speciesRow -> LOG.trace("Subtaxa authority {}", speciesRow.getSubtaxaAuthority()))
// keep first match only
.findFirst()
// to String or null
.map(speciesRow -> speciesRow.getSubtaxaAuthority()).orElse(null);
}
}
......@@ -122,9 +122,25 @@ public class TaxonomyChecker {
* @return suggested fixes for subtaxa or empty list if there are no suggestions.
*/
public List<String> suggestSubtaxa(String genus, String species, String subtaxa, int maxSize) {
if (StringUtils.isBlank(subtaxa) || !database.containsSpecies(genus, species)) {
if (StringUtils.isBlank(subtaxa) || database.containsSubtaxa(genus, species, subtaxa) || !database.containsSpecies(genus, species)) {
return Collections.emptyList();
}
return database.findSimilarSubtaxa(genus, species, subtaxa, maxSize);
}
/**
* Return authority for the genus + species + subtaxa
*
* @param genus the genus
* @param species the species
* @param subtaxa subtaxa
* @return species authority if there's one matching record in the database
*/
public String getSubtaxaAuthority(String genus, String species, String subtaxa) {
if (!database.containsSubtaxa(genus, species, subtaxa)) {
return null;
}
return database.getSubtaxaAuthority(genus, species, subtaxa);
}
}
......@@ -68,6 +68,16 @@ public interface TaxonomyDatabase {
*/
String getSpeciesAuthority(String genus, String species);
/**
* Does the database contain at least one record that exactly matches specified genus, species and subtaxa?.
*
* @param genus valid genus
* @param species valid species
* @param subtaxa subtaxa to check
* @return <code>true</code> if there is at least one record
*/
boolean containsSubtaxa(String genus, String species, String subtaxa);
/**
* Suggest a list of subtaxa
*
......@@ -79,4 +89,14 @@ public interface TaxonomyDatabase {
*/
List<String> findSimilarSubtaxa(String genus, String species, String subtaxa, int maxSize);
/**
* Return the authority for the specified subtaxa
*
* @param genus the genus
* @param species the species
* @param subtaxa subtaxa
* @return the authority or <code>null</code>
*/
String getSubtaxaAuthority(String genus, String species, String subtaxa);
}
......@@ -91,6 +91,13 @@ public class InMemoryDatabaseTest {
return;
}
assertThat(DATABASE.containsSubtaxa("Vigna", "unguiculata", "subsp. dekindtiana"), equalTo(true));
assertThat(DATABASE.containsSubtaxa("Vigna", "unguiculata", "var. dekindtiana"), equalTo(true));
assertThat(DATABASE.findSimilarSubtaxa("Vigna", "unguiculata", "dekindtiana", 5), contains("subsp. dekindtiana", "var. dekindtiana"));
assertThat(DATABASE.containsSpecies("Vigna", "dekindtiana"), equalTo(true));
assertThat(DATABASE.containsSubtaxa("Vigna", "unguiculata", "var. protracta"), equalTo(true));
assertThat(DATABASE.getSubtaxaAuthority("Vigna", "unguiculata", "var. protracta"), equalTo("(E. Mey.) Verdc."));
}
}
......@@ -330,7 +330,26 @@ public class GGTC {
if (LOG.isTraceEnabled())
LOG.trace("SPECIES_check: {}", StringUtils.defaultIfBlank(subtaxa_check, "NULL"));
outputLine[outputHeaders.indexOf(HEADER_SUBTAXA_CHECK)] = subtaxa_check;
outputLine[outputHeaders.indexOf(HEADER_SUBTAXA_CHECK)] = StringUtils.equals(subtaxa, subtaxa_check) ? null : subtaxa_check;
}
if (outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK) >= 0) {
String genus = sourceLine[sourceMapping.get(HEADER_GENUS)];
String species = sourceLine[sourceMapping.get(HEADER_SPECIES)];
String subtaxa = sourceLine[sourceMapping.get(HEADER_SUBTAXA)];
String subtauthor_check = taxonomyChecker.getSubtaxaAuthority(genus, species, subtaxa);
if (LOG.isTraceEnabled())
LOG.trace("SUBTAUTHOR_check: {}", StringUtils.defaultIfBlank(subtauthor_check, "NULL"));
// Add if needed
if (sourceMapping.get(HEADER_SUBTAUTHOR) != null) {
String subtauthor = sourceLine[sourceMapping.get(HEADER_SUBTAUTHOR)];
outputLine[outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK)] = StringUtils.equals(subtauthor, subtauthor_check) ? null : subtauthor_check;
} else {
outputLine[outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK)] = subtauthor_check;
}
}
LOG.info("Source and result:\n\n\t{}\n\t{}\n", Arrays.toString(sourceLine), Arrays.toString(outputLine));
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment