Commit e4fdfe33 authored by Matija Obreza's avatar Matija Obreza
Browse files

Added GRINTAX_speciesId and GRINTAX_speciesCurrent columns in output CSV,...

Added GRINTAX_speciesId and GRINTAX_speciesCurrent columns in output CSV, added current species data for all suggestions
parent f70c06a2
......@@ -55,7 +55,10 @@ import com.opencsv.CSVWriter;
*/
public class GGTC {
private static final String SUGGESTION_BLANK = "_blank_";
private static final String CHECK_PASSED = "OK";
private static final String CURRENT_SUFFIX = " [current]";
private static final String HEADER_GENUS = "GENUS";
private static final String HEADER_GENUS_CHECK = "GENUS_check";
......@@ -71,6 +74,9 @@ public class GGTC {
private static final String HEADER_SUBTAUTHOR = "SUBTAUTHOR";
private static final String HEADER_SUBTAUTHOR_CHECK = "SUBTAUTHOR_check";
private static final String HEADER_GRINTAX_SPECIESID = "GRINTAX_speciesId";
private static final String HEADER_GRINTAX_SPECIESCURRENT = "GRINTAX_speciesCurrent";
/** The Constant LOG. */
private final static Logger LOG = LoggerFactory.getLogger(GGTC.class);
......@@ -100,11 +106,11 @@ public class GGTC {
// Increase logging level
org.apache.log4j.Logger logger = LogManager.getLogger("org.genesys2");
logger.setLevel(nextLevel(logger.getLevel()));
LOG.warn("Increasing loglevel to {}", logger.getLevel());
LOG.info("Increasing loglevel to {}", logger.getLevel());
} else if ("-csv".equals(arg)) {
char[] csv = args[++i].toCharArray();
LOG.warn("Setting CSV chars to {}", csv);
LOG.info("Setting CSV chars to {}", csv);
for (int j = 0; j < csv.length; j++) {
if (j == 0)
ggtc.separatorChar = csv[0];
......@@ -227,7 +233,7 @@ public class GGTC {
// Check for _check headers and exit if necessary
boolean err = false;
for (String header : new String[] { HEADER_GENUS_CHECK, HEADER_SPECIES_CHECK, HEADER_SPAUTHOR_CHECK, HEADER_SUBTAXA_CHECK, HEADER_SUBTAUTHOR_CHECK }) {
LOG.info("Looking for header {} in {}", header, Arrays.asList(headers));
LOG.info("Must not find header {} in {}", header, Arrays.asList(headers));
int pos = ArrayUtils.indexOf(headers, header);
if (pos >= 0) {
LOG.error("Header {} found in input CSV.", header);
......@@ -253,13 +259,13 @@ public class GGTC {
}
}
LOG.debug("Source mapping indexes: {}", outputMapping);
LOG.info("1 Output headers: {}", outputHeaders);
LOG.debug("1 Output headers: {}", outputHeaders);
Arrays.stream(checkerHeaders).forEach(header -> {
LOG.info("Looking for header {} in {}", header, Arrays.asList(headers));
LOG.debug("Looking for header {} in {}", header, Arrays.asList(headers));
int pos = ArrayUtils.indexOf(headers, header);
if (pos >= 0) {
LOG.info("Header {} is in position {}", header, pos);
LOG.debug("Header {} is in position {}", header, pos);
sourceMapping.put(header, pos);
} else {
LOG.info("No header {}", header);
......@@ -274,20 +280,30 @@ public class GGTC {
}
});
// Insert extras
if (outputHeaders.contains(HEADER_GENUS) && outputHeaders.contains(HEADER_SPECIES)) {
if (!outputHeaders.contains(HEADER_SPAUTHOR_CHECK)) {
LOG.debug("Adding {} because it's missing", HEADER_SPAUTHOR_CHECK);
outputHeaders.add(outputHeaders.size(), HEADER_SPAUTHOR_CHECK);
outputHeaders.add(outputHeaders.indexOf(HEADER_SPECIES) + 1, HEADER_SPAUTHOR_CHECK);
}
if (outputHeaders.contains(HEADER_SUBTAXA) && !outputHeaders.contains(HEADER_SUBTAUTHOR_CHECK)) {
LOG.debug("Adding {} because it's missing", HEADER_SUBTAUTHOR_CHECK);
outputHeaders.add(outputHeaders.size(), HEADER_SUBTAUTHOR_CHECK);
if (outputHeaders.contains(HEADER_SUBTAXA)) {
if (!outputHeaders.contains(HEADER_SUBTAUTHOR_CHECK)) {
LOG.debug("Adding {} because it's missing", HEADER_SUBTAUTHOR_CHECK);
outputHeaders.add(outputHeaders.size(), HEADER_SUBTAUTHOR_CHECK);
}
}
LOG.debug("Adding {}", HEADER_GRINTAX_SPECIESCURRENT);
outputHeaders.add(outputHeaders.indexOf(HEADER_SPECIES) + 1, HEADER_GRINTAX_SPECIESCURRENT);
outputMapping.add(outputHeaders.indexOf(HEADER_SPECIES) + 1, null);
LOG.debug("Adding {}", HEADER_GRINTAX_SPECIESID);
outputHeaders.add(outputHeaders.indexOf(HEADER_SPECIES) + 1, HEADER_GRINTAX_SPECIESID);
outputMapping.add(outputHeaders.indexOf(HEADER_SPECIES) + 1, null);
}
LOG.info("Output headers: {}", outputHeaders);
LOG.debug("Source mapping indexes: {}", outputMapping);
LOG.info("Source mapping indexes: {}", outputMapping);
try (CSVWriter writer = new CSVWriter(new BufferedWriter(new OutputStreamWriter(outputStream)), ',', '"', '\\', "\r\n")) {
......@@ -309,9 +325,13 @@ public class GGTC {
outputLine[outputCol] = sourceLine[mapped];
}
String genus = sourceMapping.get(HEADER_GENUS) == null ? null : sourceLine[sourceMapping.get(HEADER_GENUS)];
String species = sourceMapping.get(HEADER_SPECIES) == null ? null : sourceLine[sourceMapping.get(HEADER_SPECIES)];
String subtaxa = sourceMapping.get(HEADER_SUBTAXA) == null ? null : sourceLine[sourceMapping.get(HEADER_SUBTAXA)];
String spAuthor = sourceMapping.get(HEADER_SPAUTHOR) == null ? null : sourceLine[sourceMapping.get(HEADER_SPAUTHOR)];
String subtAuthor = sourceMapping.get(HEADER_SUBTAUTHOR) == null ? null : sourceLine[sourceMapping.get(HEADER_SUBTAUTHOR)];
if (outputHeaders.indexOf(HEADER_GENUS_CHECK) >= 0) {
String genus = sourceLine[sourceMapping.get(HEADER_GENUS)];
String genus_check = taxonomyChecker.suggestGenus(genus, 5).stream().reduce(null, (prev, suggestion) -> prev == null ? suggestion : prev + ";" + suggestion);
if (LOG.isTraceEnabled())
......@@ -321,9 +341,6 @@ public class GGTC {
}
if (outputHeaders.indexOf(HEADER_SPECIES_CHECK) >= 0) {
String genus = sourceLine[sourceMapping.get(HEADER_GENUS)];
String species = sourceLine[sourceMapping.get(HEADER_SPECIES)];
String species_check = taxonomyChecker.suggestSpecies(genus, species, 5).stream().reduce(null, (prev, suggestion) -> prev == null ? suggestion : prev + ";" + suggestion);
if (LOG.isTraceEnabled())
......@@ -333,9 +350,6 @@ public class GGTC {
}
if (outputHeaders.indexOf(HEADER_SPAUTHOR_CHECK) >= 0) {
String genus = sourceLine[sourceMapping.get(HEADER_GENUS)];
String species = sourceLine[sourceMapping.get(HEADER_SPECIES)];
String spauthor_check = taxonomyChecker.getSpeciesAuthority(genus, species);
if (LOG.isTraceEnabled())
......@@ -343,7 +357,6 @@ public class GGTC {
// Add if needed
if (sourceMapping.get(HEADER_SPAUTHOR) != null) {
String spAuthor = sourceLine[sourceMapping.get(HEADER_SPAUTHOR)];
// System.err.println(spAuthor + " " + spauthor_check);
outputLine[outputHeaders.indexOf(HEADER_SPAUTHOR_CHECK)] = StringUtils.equals(spAuthor, spauthor_check) ? CHECK_PASSED : spauthor_check;
} else {
......@@ -354,10 +367,6 @@ public class GGTC {
}
if (outputHeaders.indexOf(HEADER_SUBTAXA_CHECK) >= 0) {
String genus = sourceLine[sourceMapping.get(HEADER_GENUS)];
String species = sourceLine[sourceMapping.get(HEADER_SPECIES)];
String subtaxa = sourceLine[sourceMapping.get(HEADER_SUBTAXA)];
String subtaxa_check = taxonomyChecker.suggestSubtaxa(genus, species, subtaxa, 5).stream().reduce(null,
(prev, suggestion) -> prev == null ? suggestion : prev + ";" + suggestion);
......@@ -368,10 +377,6 @@ public class GGTC {
}
if (outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK) >= 0) {
String genus = sourceLine[sourceMapping.get(HEADER_GENUS)];
String species = sourceLine[sourceMapping.get(HEADER_SPECIES)];
String subtaxa = sourceLine[sourceMapping.get(HEADER_SUBTAXA)];
String subtauthor_check = taxonomyChecker.getSubtaxaAuthority(genus, species, subtaxa);
if (LOG.isTraceEnabled())
......@@ -379,14 +384,38 @@ public class GGTC {
// Add if needed
if (sourceMapping.get(HEADER_SUBTAUTHOR) != null) {
String subtauthor = sourceLine[sourceMapping.get(HEADER_SUBTAUTHOR)];
outputLine[outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK)] = StringUtils.equals(subtauthor, subtauthor_check) ? CHECK_PASSED : subtauthor_check;
outputLine[outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK)] = StringUtils.equals(subtAuthor, subtauthor_check) ? CHECK_PASSED : subtauthor_check;
} else {
outputLine[outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK)] = subtauthor_check;
}
}
LOG.info("Source and result:\n\n\t{}\n\t{}\n", Arrays.toString(sourceLine), Arrays.toString(outputLine));
if (outputHeaders.indexOf(HEADER_GRINTAX_SPECIESID) >= 0) {
List<SpeciesRow> speciesRows = taxonomyDatabase.findSpeciesRow(genus, species, StringUtils.defaultIfBlank(subtaxa, ""));
if (speciesRows.size() == 1) {
SpeciesRow speciesRow = speciesRows.get(0);
outputLine[outputHeaders.indexOf(HEADER_GRINTAX_SPECIESID)] = speciesRow.getSpeciesId().toString();
outputLine[outputHeaders.indexOf(HEADER_GRINTAX_SPECIESCURRENT)] = Boolean.toString(speciesRow.isCurrent());
if (!speciesRow.isCurrent()) {
LOG.debug("{} is not current", speciesRow);
SpeciesRow currentSpecies = taxonomyDatabase.getSpeciesRow(speciesRow.getCurrentSpeciesId());
String currentGenus = taxonomyDatabase.getGenus(currentSpecies.getGenusId());
// LOG.warn("Result: {}", Arrays.toString(outputLine));
updateOutputColumn(outputLine, outputHeaders.indexOf(HEADER_GENUS_CHECK), currentGenus, genus, CURRENT_SUFFIX);
updateOutputColumn(outputLine, outputHeaders.indexOf(HEADER_SPECIES_CHECK), currentSpecies.getSpeciesName(), species, CURRENT_SUFFIX);
updateOutputColumn(outputLine, outputHeaders.indexOf(HEADER_SPAUTHOR_CHECK), StringUtils.defaultIfBlank(currentSpecies.getSpeciesAuthority(), SUGGESTION_BLANK),
spAuthor, CURRENT_SUFFIX);
updateOutputColumn(outputLine, outputHeaders.indexOf(HEADER_SUBTAXA_CHECK), StringUtils.defaultIfBlank(currentSpecies.getSubtaxa(), SUGGESTION_BLANK), subtaxa,
CURRENT_SUFFIX);
updateOutputColumn(outputLine, outputHeaders.indexOf(HEADER_SUBTAUTHOR_CHECK), StringUtils.defaultIfBlank(currentSpecies.getSubtaxaAuthority(), SUGGESTION_BLANK),
subtAuthor, CURRENT_SUFFIX);
}
} else {
LOG.debug("Multiple speciesRows match genus={} species={} subtaxa={}", genus, species, subtaxa);
}
}
LOG.trace("Source and result:\n\n\t{}\n\t{}\n", Arrays.toString(sourceLine), Arrays.toString(outputLine));
writer.writeNext(outputLine);
}
......@@ -395,4 +424,27 @@ public class GGTC {
}
}
/**
* Replace the value in the outputRow at index
*
* @param outputLine the line
* @param index column index
* @param newValue new value
* @param suffix suffix to append if writing the value
*/
private void updateOutputColumn(String[] outputLine, int index, String newValue, String ignoreIfMatches, String suffix) {
if (index == -1)
return;
if (newValue == null) {
return;
}
if (ignoreIfMatches != null && (ignoreIfMatches.equals(outputLine[index]) || ignoreIfMatches.equals(newValue))) {
// do nothing
} else if (ignoreIfMatches == null || CHECK_PASSED.equals(outputLine[index])) {
outputLine[index] = newValue + suffix;
} else {
outputLine[index] += ";" + newValue + suffix;
}
}
}
......@@ -26,3 +26,4 @@ log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %t %5p %c{1}:%L - %m
log4j.rootLogger=error, stdout
log4j.category.org.genesys2=warn
log4j.category.org.genesys2.gringlobal.taxonomy.cli=info
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment