Commit abce00d4 authored by Matija Obreza's avatar Matija Obreza
Browse files

Added #levenshteinCoefficient

parent 9b2eb2ff
......@@ -5,6 +5,8 @@ package org.genesys2.gringlobal.taxonomy.component;
import java.util.Arrays;
import org.apache.commons.lang3.StringUtils;
/**
* Code from
* <ul>
......@@ -81,4 +83,8 @@ public class StringSimilarity {
}
return (double) matches / (n + m);
}
public static double getLevenshteinCoefficient(String a, String b) {
return 1.0 - (1.0 * StringUtils.getLevenshteinDistance(a, b) / Math.max(a.length(), b.length()));
}
}
......@@ -65,4 +65,18 @@ public class DistanceTest {
}
}
/**
* Anizantha Anisantha;Anisacantha;Anigozanthos;Anacantha;Sannantha
*/
// TODO Relates to https://gitlab.croptrust.org/genesys-pgr/taxonomy-tools/issues/16
// @Test
public void diceCoeffAnizantha() {
String[] bb = { "Anizantha", "Anisantha", "Anisanthan", "Anisacantha", "Anigozanthos", "Anacantha", "Sannantha" };
for (String b : bb) {
double d = StringSimilarity.diceCoefficientOptimized(bb[0], b);
System.err.println("'" + bb[0] + "' '" + b + "' d=" + d + " lev=" + StringUtils.getLevenshteinDistance(bb[0], b) + " lc=" + StringSimilarity.getLevenshteinCoefficient(bb[0], b));
}
System.err.println(StringUtils.getLevenshteinDistance("a", "aaaaaaaaaa") + " lc=" + StringSimilarity.getLevenshteinCoefficient("a", "aaaaaaaaaa"));
System.err.println(StringUtils.getLevenshteinDistance("b", "aaaaaaaaaa") + " lc=" + StringSimilarity.getLevenshteinCoefficient("b", "aaaaaaaaaa"));
};
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment