Commit 78d58890 authored by Matija Obreza's avatar Matija Obreza
Browse files

Revised decimal mark handling

parent 3433c4a3
......@@ -17,10 +17,10 @@ package org.genesys.taxonomy.checker.web.service;
import java.io.IOException;
import java.io.Reader;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.ParseException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.genesys.taxonomy.checker.TaxonomyException;
......@@ -47,24 +47,18 @@ public interface ProcessService {
List<String[]> process(Reader reader, Character separator, Character quoteChar, Character escapeChar, String validateType, Boolean toCurrentTaxa, Character decimalMark)
throws ParseException, IOException, TaxonomyException;
/**
* Method for parsing numeric values(for example DECLATITUDE and DECLONGITUDE values)
* @param number string number for parsing
* @param decimalMark decimal mark
* @return parsed value
* @throws RuntimeException if parsing is wrong
* Create instance of DecimalFormat used to parse numbers from the CSV
* @param decimalMark
* @return DecimalFormat using the specified decimal mark
*/
default String parseNumber(String number, Character decimalMark){
StringBuffer sBuffer = new StringBuffer();
Pattern p = Pattern.compile("\\d*[" + decimalMark + "]\\d*");
Matcher m = p.matcher(number);
while (m.find()) {
sBuffer.append(m.group());
}
if (sBuffer.toString().length() == 0) {
throw new RuntimeException("You set wrong 'Decimal mark': '" + decimalMark + "' or wrong numeric value in table: " + number);
}
return sBuffer.toString();
default DecimalFormat createDecimalFormat(char decimalMark) {
final DecimalFormat decimalFormat = new DecimalFormat();
DecimalFormatSymbols symbols = new DecimalFormatSymbols();
symbols.setDecimalSeparator(decimalMark);
symbols.setGroupingSeparator(decimalMark == ',' ? '.' : ',');
decimalFormat.setDecimalFormatSymbols(symbols);
return decimalFormat;
}
}
......@@ -2,6 +2,7 @@ package org.genesys.taxonomy.checker.web.service.impl;
import java.io.IOException;
import java.io.Reader;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
......@@ -10,12 +11,9 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import com.opencsv.CSVReader;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
......@@ -29,6 +27,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.opencsv.CSVReader;
/**
* @author Andrey Lugovskoy.
*/
......@@ -48,6 +48,8 @@ public class CountryProcessServiceImpl implements ProcessService {
private List<String[]> execute(Reader readerr, char separatorChar, char quoteChar, char escapeChar, Character decimalMark) throws IOException {
final DecimalFormat decimalFormat = createDecimalFormat(decimalMark);
final List<String[]> lines = new LinkedList<>();
try (CSVReader reader = new CSVReader(readerr, separatorChar, quoteChar, escapeChar, 0, false)) {
......@@ -120,7 +122,7 @@ public class CountryProcessServiceImpl implements ProcessService {
if (inputs.size() == 0)
break;
Future<?> future = threadPool.submit(new Runnable() {
threadPool.submit(new Runnable() {
@Override
public void run() {
for (String[] nextLine : inputs) {
......@@ -131,14 +133,15 @@ public class CountryProcessServiceImpl implements ProcessService {
// if ("101346".equals(nextLine[0])) {
// LOG.info("!!! Got what we're looking for");
// }
String declongitude = parseNumber(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LONGITUDE)], decimalMark);
String declatitude = parseNumber(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LATITUDE)], decimalMark);
String declongitude = StringUtils.trimToNull(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LONGITUDE)]);
String declatitude = StringUtils.trimToNull(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LATITUDE)]);
String origCty = nextLine[sourceMapping.get(ApplicationUtils.HEADER_ORIGCTY)].trim();
if (StringUtils.isNotBlank(declatitude) && StringUtils.isNotBlank(declongitude)) {
try {
float longitude = Float.parseFloat(declongitude);
float latitude = Float.parseFloat(declatitude);
float longitude = decimalFormat.parse(declongitude).floatValue();
float latitude = decimalFormat.parse(declatitude).floatValue();
LOG.debug("Parsed DECLATITUDE/DECLONGITUDE ({}, {}) --> ({}, {})", declongitude, declatitude, longitude, latitude);
try {
result = countryOfOriginService.getCountries(longitude, latitude, origCty, ApplicationUtils.ALLOWED_DISTANCE_MARGIN);
......@@ -164,12 +167,6 @@ public class CountryProcessServiceImpl implements ProcessService {
}
});
try {
future.get();
} catch (Exception e) {
throw new IOException(e);
}
} while (true);
LOG.info("Shutting down");
......
......@@ -2,6 +2,7 @@ package org.genesys.taxonomy.checker.web.service.impl;
import java.io.IOException;
import java.io.Reader;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
......@@ -10,12 +11,9 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import com.opencsv.CSVReader;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
......@@ -29,6 +27,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.opencsv.CSVReader;
/**
* @author Andrey Lugovskoy.
*/
......@@ -52,6 +52,8 @@ public class LandOrSeaProcessServiceImpl implements ProcessService {
*/
public List<String[]> execute(Reader readerr, char separatorChar, char quoteChar, char escapeChar, char decimalMark) throws IOException {
final DecimalFormat decimalFormat = createDecimalFormat(decimalMark);
final List<String[]> lines = new LinkedList<>();
try (CSVReader reader = new CSVReader(readerr, separatorChar, quoteChar, escapeChar, 0, false)) {
......@@ -121,25 +123,27 @@ public class LandOrSeaProcessServiceImpl implements ProcessService {
if (inputs.size() == 0)
break;
Future<?> future = threadPool.submit(new Runnable() {
threadPool.submit(new Runnable() {
@Override
public void run() {
for (String[] nextLine : inputs) {
String[] outputLine = HeaderUtils.toOutputLine(nextLine, outputHeaders, outputMapping);
String declongitude = parseNumber(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LONGITUDE)], decimalMark);
String declatitude = parseNumber(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LATITUDE)], decimalMark);
String declongitude = StringUtils.trimToNull(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LONGITUDE)]);
String declatitude = StringUtils.trimToNull(nextLine[sourceMapping.get(ApplicationUtils.HEADER_LATITUDE)]);
String result = null;
if (StringUtils.isNotBlank(declatitude) && StringUtils.isNotBlank(declongitude)) {
try {
float longitude = Float.parseFloat(declongitude);
float latitude = Float.parseFloat(declatitude);
float longitude = decimalFormat.parse(declongitude).floatValue();
float latitude = decimalFormat.parse(declatitude).floatValue();
LOG.debug("Parsed DECLATITUDE/DECLONGITUDE ({}, {}) --> ({}, {})", declongitude, declatitude, longitude, latitude);
result = landOrSeaService.classifyLocation(longitude, latitude, ApplicationUtils.ALLOWED_DISTANCE_MARGIN);
} catch (Exception e) {
LOG.debug("Error parsing lat/lon", e);
if (e.getCause() != null)
result = "ERROR: " + e.getCause().getMessage();
else
......@@ -153,13 +157,7 @@ public class LandOrSeaProcessServiceImpl implements ProcessService {
}
}
});
try {
future.get();
} catch (Exception e) {
throw new IOException(e);
}
} while (true);
LOG.info("Shutting down");
......
......@@ -24,6 +24,8 @@ label.non.current.taxa=Flag non-current taxa
label.yes=Yes
label.no=No
label.decimal.mark=Decimal mark
label.decimal.mark.dot=Dot (\u03C0 is approximately 3.14159)
label.decimal.mark.comma=Comma (\u03C0 is approximately 3,14159)
http-error.401=Unauthorized
http-error.200= It's ok
......
......@@ -2,111 +2,120 @@
<html>
<head>
<title><spring:message code="label.title"/></title>
<style>
html {
font-family: Tahoma;
font-size: 12px;
}
<title><spring:message code="label.title"/></title>
<style>
html {
font-family: Tahoma;
font-size: 12px;
}
label {
display: block;
margin: 1em 0 0.5em;
font-weight: bold;
}
h2 {
font-size: 14px;
}
p.hint {
margin: 0.5em;
font-style: italic;
}
h3 {
margin: 1em 0 0.5em;
font-size: 12px;
}
form textarea {
width: 100%;
height: 10em;
}
p.hint {
margin: 0.5em;
font-style: italic;
}
.btn {
padding: 1em 2em;
margin: 2em 0;
}
form textarea {
width: 100%;
height: 10em;
}
</style>
.btn {
padding: 1em 2em;
margin: 2em 0;
}
</style>
</head>
<body>
<h1>
<h1>
How to use the MCPD taxonomy validation tool?
</h1>
<p>
See this short video to learn how to use the MCPD taxonomy validation tool: <a target="_blank"
href="https://www.youtube.com/watch?v=LR9Fl1P84Gc&index=6&list=PLDlzgGuc_qUrhzC0o4Mo5Esvn8vQ0R-D5">https://www.youtube.com/watch?v=LR9Fl1P84Gc&index=6&list=PLDlzgGuc_qUrhzC0o4Mo5Esvn8vQ0R-D5</a>
</p>
<div>
<label>Source taxonomy databases</label>
</h1>
<p>
See this short video to learn how to use the MCPD taxonomy validation tool:
<a target="_blank" href="https://www.youtube.com/watch?v=LR9Fl1P84Gc&index=6&list=PLDlzgGuc_qUrhzC0o4Mo5Esvn8vQ0R-D5">https://www.youtube.com/watch?v=LR9Fl1P84Gc&index=6&list=PLDlzgGuc_qUrhzC0o4Mo5Esvn8vQ0R-D5</a>
</p>
<div>
<h2>Source taxonomy databases</h2>
<p class="hint">The tool checks data against the GRIN-Global Taxonomy database (version 1.9.6.2) maintained by USDA-ARS. See
<a href="https://npgsweb.ars-grin.gov/gringlobal/taxon/abouttaxonomy.aspx" target="_blank">GRIN Taxonomy for Plants.</a>
<a href="https://npgsweb.ars-grin.gov/gringlobal/taxon/abouttaxonomy.aspx" target="_blank">GRIN Taxonomy for Plants.</a>
</p>
<label>Data source</label>
<h2>Data source</h2>
<p class="hint">This tools is able to parse CSV data.</p>
<label>Validated columns</label>
<p class="hint">Only the following MCPD columns will be checked: GENUS, SPECIES, SPAUTHOR, SUBTAXA, SUBTAUTHOR. Other columns will be included in the output, but not
processed.</p>
</div>
<h2>Validated columns</h2>
<p class="hint">Only the following MCPD columns will be checked: GENUS, SPECIES, SPAUTHOR, SUBTAXA, SUBTAUTHOR. Other columns will be included in the output, but not processed.</p>
</div>
<form id="process" action="process" name="process" method="POST" enctype="multipart/form-data">
<form id="process" action="process" name="process" method="POST" enctype="multipart/form-data">
<h1>Submit data for validation</h1>
<div>
<label><spring:message code="label.copy-paste-csv"/>:</label>
<p class="hint"><spring:message code="hint.copy-paste-csv"/></p>
<textarea name="csvText" id="csvText"></textarea>
<h3><spring:message code="label.copy-paste-csv"/>:</h3>
<p class="hint"><spring:message code="hint.copy-paste-csv"/></p>
<textarea name="csvText" id="csvText"></textarea>
</div>
<div>
<label for="file"><spring:message code="label.file"/>:</label>
<input id="file" type="file" name="csvFile" accept="text/*"/>
<p class="hint"><spring:message code="hint.upload-file"/></p>
<label for="file"><spring:message code="label.file"/>:</label>
<input id="file" type="file" name="csvFile" accept="text/*"/>
<p class="hint"><spring:message code="hint.upload-file"/></p>
</div>
<h2>CSV Configuration</h2>
<div>
<label for="separator"><spring:message code="label.csv-separator"/>:</label>
<input id="separator" name="separator" placeholder=","/>
<p class="hint"><spring:message code="hint.csv-separator"/></p>
<label for="separator"><spring:message code="label.csv-separator"/>:</label>
<input id="separator" name="separator" placeholder=","/>
<p class="hint"><spring:message code="hint.csv-separator"/></p>
</div>
<div>
<label for="quoteChar"><spring:message code="label.quote-char"/>:</label>
<input id="quoteChar" name="quoteChar" placeholder="&#34"/>
<p class="hint"><spring:message code="hint.quote-char"/></p>
<label for="quoteChar"><spring:message code="label.quote-char"/>:</label>
<input id="quoteChar" name="quoteChar" placeholder="&#34"/>
<p class="hint"><spring:message code="hint.quote-char"/></p>
</div>
<div>
<label for="escapeChar"><spring:message code="label.escape-char"/>:</label>
<input id="escapeChar" name="escapeChar" placeholder=""/>
<p class="hint"><spring:message code="hint.escape-char"/></p>
<label for="escapeChar"><spring:message code="label.escape-char"/>:</label>
<input id="escapeChar" name="escapeChar" placeholder=""/>
<p class="hint"><spring:message code="hint.escape-char"/></p>
</div>
<div>
<label><spring:message code="label.non.current.taxa"/>:</label>
<input type="radio" name="toCurrentTaxa" value="true"><spring:message code="label.yes"/>
<input type="radio" name="toCurrentTaxa" value="false" checked><spring:message code="label.no"/>
<h3><spring:message code="label.decimal.mark"/>:</h3>
<input type="radio" id="decimalMarkDot" name="decimalMark" value="." checked/>
<label for="decimalMarkDot"><spring:message code="label.decimal.mark.dot"/></label>
<input type="radio" id="decimalMarkComma" name="decimalMark" value=","/>
<label for="decimalMarkComma"><spring:message code="label.decimal.mark.comma"/></label>
</div>
<h2>Other options</h2>
<p class="hint">Options below are specific to validators, try them out.</p>
<div>
<label><spring:message code="label.decimal.mark"/>:</label>
<input type="radio" name="decimalMark" value=",">","
<input type="radio" name="decimalMark" value="." checked>"."</br>
<h3><spring:message code="label.non.current.taxa"/>:</h3>
<input type="radio" id="toCurrentTaxaYes" name="toCurrentTaxa" value="true"/>
<label for="toCurrentTaxaYes"><spring:message code="label.yes"/></label>
<input type="radio" id="toCurrentTaxaNo" name="toCurrentTaxa" value="false" checked/>
<label for="toCurrentTaxaNo"><spring:message code="label.no"/></label>
</div>
<div>
<button class="btn" type="submit" value="" name="validateType">
<spring:message code="label.run-check"/>
</button>
<button class="btn" type="submit" value="country" name="validateType">
<spring:message code="label.run-check-country"/>
</button>
<button class="btn" type="submit" value="landorsea" name="validateType">
<spring:message code="label.run-check-land"/>
</button>
<button class="btn" type="submit" value="" name="validateType">
<spring:message code="label.run-check"/>
</button>
<button class="btn" type="submit" value="country" name="validateType">
<spring:message code="label.run-check-country"/>
</button>
<button class="btn" type="submit" value="landorsea" name="validateType">
<spring:message code="label.run-check-land"/>
</button>
</div>
</form>
</form>
</body>
</html>
......@@ -41,7 +41,11 @@
.x-SPAUTHOR_check,
.x-SPECIES_check,
.x-SUBTAUTHOR_check,
.x-SUBTAXA_check {
.x-SUBTAXA_check,
.x-ORIGCTY_check,
.x-DECLATITUDE_parsed,
.x-DECLONGITUDE_parsed,
.x-LANDorSEA_check {
background-color: #ffffca;
}
thead .x-GENUS_check,
......@@ -50,7 +54,11 @@
thead .x-SPAUTHOR_check,
thead .x-SPECIES_check,
thead .x-SUBTAUTHOR_check,
thead .x-SUBTAXA_check {
thead .x-SUBTAXA_check,
thead .x-ORIGCTY_check,
thead .x-DECLATITUDE_parsed,
thead .x-DECLONGITUDE_parsed,
thead .x-LANDorSEA_check {
background-color: Yellow;
}
......@@ -58,6 +66,10 @@
</head>
<body>
<h1>How to interpret results?</h1>
<div>
<label>"_parsed" columns</label>
<p class="hint">For numeric values these columns contain the value as parsed by the program. See if setting a different decimal mark character helps.</p>
</div>
<div>
<label>"_check" columns</label>
<p class="hint">Columns highlighted Yellow were injected into your dataset at most appropriate positions. They contain the results of the validation run.</p>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment