Commit 9622ab11 authored by Matija Obreza's avatar Matija Obreza
Browse files

TaxonomyChecker must return a list containing the match if exact match

parent 846f616b
......@@ -22,7 +22,7 @@ package org.genesys2.gringlobal.taxonomy.component;
public class BestScore {
/** The best score. */
private double bestScore = Float.MIN_VALUE;
private double bestScore = Double.MIN_VALUE;
/**
* Gets the best score.
......
......@@ -35,7 +35,7 @@ import org.slf4j.LoggerFactory;
*/
public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {
private static final double NONMATCH_MARGIN = 0.8;
public static final double NONMATCH_MARGIN = 0.8;
/** The Constant LOG. */
private final static Logger LOG = LoggerFactory.getLogger(InMemoryTaxonomyDatabase.class);
......
......@@ -57,8 +57,8 @@ public class TaxonomyChecker {
if (StringUtils.isBlank(genus) || StringUtils.equals("Unknown", genus)) {
return Arrays.asList("Unknown");
} else if (database.containsGenus(genus)) {
LOG.debug("Database contains genus={}", genus);
return Collections.emptyList();
LOG.trace("Database contains genus={}", genus);
return Arrays.asList(genus);
} else {
LOG.debug("Database does not contain genus={}", genus);
}
......@@ -76,7 +76,7 @@ public class TaxonomyChecker {
public List<String> suggestSpecies(String genus, String species, int maxSize) {
LOG.debug("Suggesting species for genus={} species={}", genus, species);
if (database.containsSpecies(genus, species)) {
return Collections.emptyList();
return Arrays.asList(species);
}
if (database.containsGenus(genus)) {
......@@ -122,9 +122,12 @@ public class TaxonomyChecker {
* @return suggested fixes for subtaxa or empty list if there are no suggestions.
*/
public List<String> suggestSubtaxa(String genus, String species, String subtaxa, int maxSize) {
if (StringUtils.isBlank(subtaxa) || database.containsSubtaxa(genus, species, subtaxa) || !database.containsSpecies(genus, species)) {
if (StringUtils.isBlank(subtaxa) || !database.containsSpecies(genus, species)) {
return Collections.emptyList();
}
if (database.containsSubtaxa(genus, species, subtaxa)) {
return Arrays.asList(subtaxa);
}
return database.findSimilarSubtaxa(genus, species, subtaxa, maxSize);
}
......
......@@ -18,6 +18,11 @@ package org.genesys2.gringlobal.taxonomy;
import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;
import java.util.Arrays;
import org.genesys2.gringlobal.taxonomy.component.BestScore;
import org.genesys2.gringlobal.taxonomy.component.InMemoryTaxonomyDatabase;
import org.genesys2.gringlobal.taxonomy.component.StringSimilarity;
import org.genesys2.gringlobal.taxonomy.component.TaxonomyChecker;
import org.junit.AfterClass;
import org.junit.BeforeClass;
......@@ -54,11 +59,51 @@ public class TaxonomyCheckerTest extends DatabaseTest {
// System.err.println(subspecies);
// }
assertThat(DATABASE.containsSubtaxa("Cucumis", "melo", "var. conomon"), equalTo(true));
assertThat("Checker must not return a suggestion for valid subtaxa", CHECKER.suggestSubtaxa("Cucumis", "melo", "var. conomon", 1), hasSize(0));
assertThat("Checker must return the same for valid subtaxa", CHECKER.suggestSubtaxa("Cucumis", "melo", "var. conomon", 1), contains("var. conomon"));
assertThat(CHECKER.suggestSubtaxa("Cucumis", "melo", "var. conoman", 1), contains("var. conomon"));
// TODO
// assertThat("Checker must prefer the quadrinomial name when provided", CHECKER.suggestSubtaxa("Cucumis", "melo", "subsp. agrestis var. conomon", 1), contains("subsp.
// agrestis var. conomon"));
}
/**
* Anthirrhinum gives Antirrhinum;Acanthorrhinum;Anarrhinum
*/
@Test
public void testAnthirrhinum() {
DATABASE.findSimilarGenus("Anthirrhinum", 5);
BestScore bestScore = new BestScore();
// System.err.println("best="+ bestScore.getBestScore());
for (String sugg : Arrays.asList("Antirrhinum", "Acanthorrhinum", "Anarrhinum")) {
double score = StringSimilarity.diceCoefficientOptimized("Anthirrhinum".toLowerCase(), sugg.toLowerCase());
// System.err.println(score);
bestScore.update(score);
// System.err.println("best="+ bestScore.getBestScore());
}
// System.err.println("best="+ bestScore.getBestScore());
// System.err.println("min best="+ bestScore.getBestScore() * .8);
for (String sugg : Arrays.asList("Antirrhinum", "Acanthorrhinum", "Anarrhinum")) {
double score = StringSimilarity.diceCoefficientOptimized("Anthirrhinum".toLowerCase(), sugg.toLowerCase());
// System.err.println(sugg + " " + score + " >= " + (bestScore.getBestScore()*0.8) + " == " + (score>=bestScore.getBestScore()*0.8));
assertThat(sugg + " should not match Anthirrhinum", score, greaterThanOrEqualTo(bestScore.getBestScore() * InMemoryTaxonomyDatabase.NONMATCH_MARGIN));
}
}
/**
* Anizantha Anisantha;Anisacantha;Anigozanthos;Anacantha;Sannantha
*/
@Test
public void testAnizantha() {
DATABASE.findSimilarGenus("Anizantha", 5);
}
/**
* Aragoptera Allagoptera;Gonoptera;Ragiopteris;Isoptera;Otoptera
*/
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment