Commit ce99f729 authored by Matija Obreza's avatar Matija Obreza
Browse files

Javadocs

parent e4fdfe33
......@@ -23,19 +23,26 @@ import java.util.Arrays;
* Does not handle digits [0-9] for obvious reasons.
*/
public class MostFrequentKChars {
/**
* Get the hash for an input string with at most K most frequent characters.
*
* <pre>
String function MostFreqKHashing (String inputString, int K)
def string outputString
for each distinct character
count occurrence of each character
for i := 0 to K
char c = next most freq ith character (if two chars have same frequency then get the first occurrence in inputString)
int count = number of occurrence of the character
append to outputString, c and count
end for
return outputString
* String function MostFreqKHashing (String inputString, int K)
* def string outputString
* for each distinct character
* count occurrence of each character
* for i := 0 to K
* char c = next most freq ith character (if two chars have same frequency then get the first occurrence in inputString)
* int count = number of occurrence of the character
* append to outputString, c and count
* end for
* return outputString
* </pre>
*
* @param string the string
* @param k the k
* @return the most frequent k hash
*/
public static String getMostFrequentKHash(String string, int k) {
return toHashString(calculateHash(string, k));
......@@ -45,10 +52,10 @@ public class MostFrequentKChars {
* Generate the hash as int[]. Array contains the character (cast to int) followed by the frequency.
*
* Every 2nd element is the character.
*
*
* @param string input string
* @param k limit result to k most frequent characters
* @return
* @return the int[]
*/
static int[] calculateHash(String string, int k) {
char[] input = string.toCharArray();
......@@ -95,31 +102,35 @@ public class MostFrequentKChars {
}
/**
* Calculate the similarity of the two hashes
* Calculate the similarity of the two hashes.
*
* @param hash1 the hash1
* @param hash2 the hash2
* @return the most freq k similarity
*/
public static int getMostFreqKSimilarity(String hash1, String hash2) {
return getMostFreqKSimilarity(decodeHash(hash1), decodeHash(hash2));
}
/**
* Calculate the similarity of the two hashes
* Calculate the similarity of the two hashes.
*
* <pre>
int function MostFreqKSimilarity (String inputStr1, String inputStr2, int limit)
def int similarity
for each c = next character from inputStr1
lookup c in inputStr2
if c is null
continue
// similarity += frequency of c in inputStr1
similarity += frequency of c in inputStr1 + frequency of c in inputStr2
// return limit - similarity
return similarity
* int function MostFreqKSimilarity (String inputStr1, String inputStr2, int limit)
* def int similarity
* for each c = next character from inputStr1
* lookup c in inputStr2
* if c is null
* continue
* // similarity += frequency of c in inputStr1
* similarity += frequency of c in inputStr1 + frequency of c in inputStr2
* // return limit - similarity
* return similarity
* </pre>
*
* @param hash1
* @param hash2
* @return
* @param hash1 the hash1
* @param hash2 the hash2
* @return the most freq k similarity
*/
public static int getMostFreqKSimilarity(int[] hash1, int[] hash2) {
int similarity = 0;
......@@ -140,15 +151,31 @@ public class MostFrequentKChars {
}
/**
* Wrapper function.
*
* <pre>
int function MostFreqKSDF (string inputStr1, string inputStr2, int K, int maxDistance)
return maxDistance - MostFreqKSimilarity(MostFreqKHashing(inputStr1,K), MostFreqKHashing(inputStr2,K))
* int function MostFreqKSDF (string inputStr1, string inputStr2, int K, int maxDistance)
* return maxDistance - MostFreqKSimilarity(MostFreqKHashing(inputStr1,K), MostFreqKHashing(inputStr2,K))
* </pre>
*
* @param inputStr1 the input str1
* @param inputStr2 the input str2
* @param K the k
* @param maxDistance the max distance
* @return the int
*/
public static int mostFreqKSDF(String inputStr1, String inputStr2, int K, int maxDistance) {
return maxDistance - getMostFreqKSimilarity(calculateHash(inputStr1, K), calculateHash(inputStr2, K));
}
/**
* Most freq ksdf.
*
* @param inputStr1 the input str1
* @param inputStr2 the input str2
* @param K the k
* @return the double
*/
public static double mostFreqKSDF(String inputStr1, String inputStr2, int K) {
int[] hash1 = calculateHash(inputStr1, K);
int[] hash2 = calculateHash(inputStr2, K);
......@@ -156,8 +183,8 @@ public class MostFrequentKChars {
}
/**
* Get the sum of frequencies of all chars represented in the hash
*
* Get the sum of frequencies of all chars represented in the hash.
*
* @param hash the hash
* @return sum of character frequencies
*/
......@@ -172,26 +199,26 @@ public class MostFrequentKChars {
}
/**
* Find frequency of char c in h2
* Find frequency of char c in hash.
*
* @param h2
* @param c
* @param hash the hash
* @param c character to find
* @return frequency, or -1 if char not found
*/
private static int findFrequency(int[] h2, char c) {
for (int i = 0; i < h2.length; i++) {
char c2 = (char) h2[i++];
private static int findFrequency(int[] hash, char c) {
for (int i = 0; i < hash.length; i++) {
char c2 = (char) hash[i++];
if (c == c2)
return h2[i];
return hash[i];
}
return -1;
}
/**
* Convert the hash formatted string "a10b8c7" to int[]
*
* @param hash1
* @return
* Convert the hash formatted string "a10b8c7" to int[].
*
* @param hash1 the hash1
* @return the int[]
*/
static int[] decodeHash(String hash1) {
int[] h = new int[hash1.length()];
......@@ -213,8 +240,8 @@ public class MostFrequentKChars {
}
/**
* Encode a hash array to String
*
* Encode a hash array to String.
*
* @param h1 hash array as generated
* @return String representation of the hash array (e.g. "i3b2")
*/
......
......@@ -117,7 +117,7 @@ public class TaxonomyChecker {
*
* @param genus must be valid genus in the database
* @param species species must be valid species within genus
* @param species current subtaxa, must not be null or blank
* @param subtaxa current subtaxa, must not be null or blank
* @param maxSize maximum number of suggestions to return
* @return suggested fixes for subtaxa or empty list if there are no suggestions.
*/
......
......@@ -107,7 +107,7 @@ public interface TaxonomyDatabase {
* @param genus valid genus
* @param species valid species
* @param maxSize maximum number of rows to return
* @return
* @return list of species matching genus and species name
*/
List<SpeciesRow> listSpecies(String genus, String species, int maxSize);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment