Commit b35b774a authored by Matija Obreza's avatar Matija Obreza
Browse files

taxonomy-tools:1.3-SNAPSHOT with new CSV readers

parent bc104c20
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
<jdk.source>1.8</jdk.source> <jdk.source>1.8</jdk.source>
<commons-lang3.version>3.3.2</commons-lang3.version> <commons-lang3.version>3.3.2</commons-lang3.version>
<genesys-geotools.version>1.1-SNAPSHOT</genesys-geotools.version> <genesys-geotools.version>1.1-SNAPSHOT</genesys-geotools.version>
<taxonomy-tools.version>1.2-SNAPSHOT</taxonomy-tools.version> <taxonomy-tools.version>1.3-SNAPSHOT</taxonomy-tools.version>
</properties> </properties>
<licenses> <licenses>
......
...@@ -19,7 +19,6 @@ import java.io.File; ...@@ -19,7 +19,6 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.text.ParseException; import java.text.ParseException;
...@@ -27,6 +26,7 @@ import java.util.ArrayList; ...@@ -27,6 +26,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -38,7 +38,7 @@ import org.genesys.taxonomy.checker.TaxonomyChecker; ...@@ -38,7 +38,7 @@ import org.genesys.taxonomy.checker.TaxonomyChecker;
import org.genesys.taxonomy.checker.TaxonomyException; import org.genesys.taxonomy.checker.TaxonomyException;
import org.genesys.taxonomy.checker.web.service.ProcessService; import org.genesys.taxonomy.checker.web.service.ProcessService;
import org.genesys.taxonomy.checker.web.util.ApplicationUtils; import org.genesys.taxonomy.checker.web.util.ApplicationUtils;
import org.genesys.taxonomy.gringlobal.component.TaxonomyReader; import org.genesys.taxonomy.gringlobal.component.CabReader;
import org.genesys.taxonomy.gringlobal.model.GenusRow; import org.genesys.taxonomy.gringlobal.model.GenusRow;
import org.genesys.taxonomy.gringlobal.model.SpeciesRow; import org.genesys.taxonomy.gringlobal.model.SpeciesRow;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -99,18 +99,20 @@ public class TaxonomyProcessServiceImpl implements ProcessService { ...@@ -99,18 +99,20 @@ public class TaxonomyProcessServiceImpl implements ProcessService {
LOG.info("Loading taxonomy_genus.txt"); LOG.info("Loading taxonomy_genus.txt");
// read taxonomy_genus.txt // read taxonomy_genus.txt
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(new File(rootDir, "taxonomy_genus.txt")), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(rootDir, "taxonomy_genus.txt")), 0)) {
Iterator<GenusRow> beanReader = CabReader.beanReader(GenusRow.class, reader).iterator();
GenusRow genusRow = null; GenusRow genusRow = null;
while ((genusRow = TaxonomyReader.toGenus(reader.readNext())) != null) { while (beanReader.hasNext() && (genusRow = beanReader.next()) != null) {
taxonomyDatabase.registerGenus(genusRow.getGenusId(), genusRow.getGenusName()); taxonomyDatabase.registerGenus(genusRow.getGenusId(), genusRow.getGenusName());
} }
} }
LOG.info("Loading taxonomy_species.txt"); LOG.info("Loading taxonomy_species.txt");
// read taxonomy_species.txt // read taxonomy_species.txt
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(new File(rootDir, "taxonomy_species.txt")), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(rootDir, "taxonomy_species.txt")), 0)) {
Iterator<SpeciesRow> beanReader = CabReader.beanReader(SpeciesRow.class, reader).iterator();
SpeciesRow speciesRow = null; SpeciesRow speciesRow = null;
while ((speciesRow = TaxonomyReader.toSpecies(reader.readNext())) != null) { while (beanReader.hasNext() && (speciesRow = beanReader.next()) != null) {
taxonomyDatabase.registerSpecies(speciesRow); taxonomyDatabase.registerSpecies(speciesRow);
} }
} }
...@@ -331,7 +333,7 @@ public class TaxonomyProcessServiceImpl implements ProcessService { ...@@ -331,7 +333,7 @@ public class TaxonomyProcessServiceImpl implements ProcessService {
outputLine[outputHeaders.indexOf(ApplicationUtils.HEADER_GRINTAX_SPECIESCURRENT)] = Boolean.toString(speciesRow.isCurrent()); outputLine[outputHeaders.indexOf(ApplicationUtils.HEADER_GRINTAX_SPECIESCURRENT)] = Boolean.toString(speciesRow.isCurrent());
if (!speciesRow.isCurrent() && toCurrentTaxa) { if (!speciesRow.isCurrent() && toCurrentTaxa) {
LOG.debug("{} is not current", speciesRow); LOG.debug("{} is not current", speciesRow);
final SpeciesRow currentSpecies = taxonomyDatabase.getSpeciesRow(speciesRow.getCurrentSpeciesId()); final SpeciesRow currentSpecies = taxonomyDatabase.getSpeciesRow(speciesRow.getCurrentTaxonomySpeciesId());
final String currentGenus = taxonomyDatabase.getGenus(currentSpecies.getGenusId()); final String currentGenus = taxonomyDatabase.getGenus(currentSpecies.getGenusId());
// LOG.warn("Result: {}", Arrays.toString(outputLine)); // LOG.warn("Result: {}", Arrays.toString(outputLine));
updateOutputColumn(outputLine, outputHeaders.indexOf(ApplicationUtils.HEADER_GENUS_CHECK), currentGenus, genus, ApplicationUtils.CURRENT_SUFFIX); updateOutputColumn(outputLine, outputHeaders.indexOf(ApplicationUtils.HEADER_GENUS_CHECK), currentGenus, genus, ApplicationUtils.CURRENT_SUFFIX);
......
...@@ -3,11 +3,11 @@ package org.genesys.grin; ...@@ -3,11 +3,11 @@ package org.genesys.grin;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException; import java.text.ParseException;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
...@@ -15,7 +15,7 @@ import org.apache.commons.lang3.StringEscapeUtils; ...@@ -15,7 +15,7 @@ import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.genesys.taxonomy.checker.web.config.ApplicationConfig; import org.genesys.taxonomy.checker.web.config.ApplicationConfig;
import org.genesys.taxonomy.download.TaxonomyDownloader; import org.genesys.taxonomy.download.TaxonomyDownloader;
import org.genesys.taxonomy.gringlobal.component.TaxonomyReader; import org.genesys.taxonomy.gringlobal.component.CabReader;
import org.genesys.taxonomy.gringlobal.model.AuthorRow; import org.genesys.taxonomy.gringlobal.model.AuthorRow;
import org.genesys.taxonomy.gringlobal.model.GenusRow; import org.genesys.taxonomy.gringlobal.model.GenusRow;
import org.genesys.taxonomy.gringlobal.model.SpeciesRow; import org.genesys.taxonomy.gringlobal.model.SpeciesRow;
...@@ -58,7 +58,7 @@ public class WhatsWrong { ...@@ -58,7 +58,7 @@ public class WhatsWrong {
TaxonomyDownloader.unpackCabinetFile(downloadedCabFile, dataFolder, false); TaxonomyDownloader.unpackCabinetFile(downloadedCabFile, dataFolder, false);
} }
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 1)) {
String[] row = null; String[] row = null;
while ((row = reader.readNext()) != null) { while ((row = reader.readNext()) != null) {
String name = row[1]; String name = row[1];
...@@ -99,13 +99,14 @@ public class WhatsWrong { ...@@ -99,13 +99,14 @@ public class WhatsWrong {
Map<String, AuthorRow> authors = new HashMap<>(); Map<String, AuthorRow> authors = new HashMap<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 0)) {
String[] row = null; Iterator<AuthorRow> beanReader = CabReader.beanReader(AuthorRow.class, reader).iterator();
while ((row = reader.readNext()) != null) { AuthorRow authorRow = null;
AuthorRow authorRow = TaxonomyReader.toAuthor(row); while (beanReader.hasNext() && (authorRow = beanReader.next()) != null) {
authors.put(authorRow.getShortName(), authorRow); authors.put(authorRow.getShortName(), authorRow);
if (! authorRow.getShortName().equals(authorRow.getShortNameHtml())) { if (! authorRow.getShortName().equals(authorRow.getShortName())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortNameHtml()); String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortName());
if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) { if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) {
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId); // LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
} }
...@@ -116,7 +117,7 @@ public class WhatsWrong { ...@@ -116,7 +117,7 @@ public class WhatsWrong {
Set<String> authorCache = new HashSet<>(); Set<String> authorCache = new HashSet<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(familyFile), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(familyFile), 1)) {
String[] row = null; String[] row = null;
while ((row = reader.readNext()) != null) { while ((row = reader.readNext()) != null) {
String authorName = row[6]; String authorName = row[6];
...@@ -155,13 +156,13 @@ public class WhatsWrong { ...@@ -155,13 +156,13 @@ public class WhatsWrong {
Map<String, AuthorRow> authors = new HashMap<>(); Map<String, AuthorRow> authors = new HashMap<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 0)) {
String[] row = null; Iterator<AuthorRow> beanReader = CabReader.beanReader(AuthorRow.class, reader).iterator();
while ((row = reader.readNext()) != null) { AuthorRow authorRow = null;
AuthorRow authorRow = TaxonomyReader.toAuthor(row); while (beanReader.hasNext() && (authorRow = beanReader.next()) != null) {
authors.put(authorRow.getShortName(), authorRow); authors.put(authorRow.getShortName(), authorRow);
if (! authorRow.getShortName().equals(authorRow.getShortNameHtml())) { if (! authorRow.getShortName().equals(authorRow.getShortName())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortNameHtml()); String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortName());
if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) { if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) {
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId); // LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
} }
...@@ -172,10 +173,10 @@ public class WhatsWrong { ...@@ -172,10 +173,10 @@ public class WhatsWrong {
Set<String> authorCache = new HashSet<>(); Set<String> authorCache = new HashSet<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(genusFile), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(genusFile), 0)) {
String[] row = null; Iterator<GenusRow> beanReader = CabReader.beanReader(GenusRow.class, reader).iterator();
while ((row = reader.readNext()) != null) { GenusRow genusRow=null;
GenusRow genusRow=TaxonomyReader.toGenus(row); while (beanReader.hasNext() && (genusRow = beanReader.next()) != null) {
if (authorCache.contains(genusRow.getGenusAuthority())) { if (authorCache.contains(genusRow.getGenusAuthority())) {
continue; continue;
...@@ -186,9 +187,9 @@ public class WhatsWrong { ...@@ -186,9 +187,9 @@ public class WhatsWrong {
for (String name : authorNames) { for (String name : authorNames) {
AuthorRow authorRow = authors.get(name); AuthorRow authorRow = authors.get(name);
if (authorRow == null) { if (authorRow == null) {
LOG.warn("No author name\t{}\tin authority\t{}\tfor genus\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", name, genusRow.getGenusAuthority(), genusRow.getGenusName(), genusRow.getGenusId(), genusRow.isCurrent()); LOG.warn("No author name\t{}\tin authority\t{}\tfor genus\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", name, genusRow.getGenusAuthority(), genusRow.getGenusName(), genusRow.getGenusId(), genusRow.getCurrentTaxonomyGenusId());
} else if (! name.equals(authorRow.getShortName())) { } else if (! name.equals(authorRow.getShortName())) {
LOG.warn("Genus\t{}\tauthority\t{}={}\tshould be\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", genusRow.getGenusName(), genusRow.getGenusAuthority(), authorRow.getShortName(), authorRow.getShortNameHtml(), genusRow.getGenusId(), genusRow.isCurrent()); LOG.warn("Genus\t{}\tauthority\t{}={}\tshould be\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", genusRow.getGenusName(), genusRow.getGenusAuthority(), authorRow.getShortName(), authorRow.getShortName(), genusRow.getGenusId(), genusRow.getCurrentTaxonomyGenusId());
} }
} }
authorCache.add(genusRow.getGenusAuthority()); authorCache.add(genusRow.getGenusAuthority());
...@@ -220,13 +221,13 @@ public class WhatsWrong { ...@@ -220,13 +221,13 @@ public class WhatsWrong {
Map<String, AuthorRow> authors = new HashMap<>(); Map<String, AuthorRow> authors = new HashMap<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 0)) {
String[] row = null; Iterator<AuthorRow> beanReader = CabReader.beanReader(AuthorRow.class, reader).iterator();
while ((row = reader.readNext()) != null) { AuthorRow authorRow = null;
AuthorRow authorRow = TaxonomyReader.toAuthor(row); while (beanReader.hasNext() && (authorRow = beanReader.next()) != null) {
authors.put(authorRow.getShortName(), authorRow); authors.put(authorRow.getShortName(), authorRow);
if (! authorRow.getShortName().equals(authorRow.getShortNameHtml())) { if (! authorRow.getShortName().equals(authorRow.getShortName())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortNameHtml()); String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortName());
if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) { if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) {
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId); // LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
} }
...@@ -235,24 +236,16 @@ public class WhatsWrong { ...@@ -235,24 +236,16 @@ public class WhatsWrong {
} }
} }
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(speciesFile), "UTF-8"), 1)) { try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(speciesFile), 0)) {
String[] row = null; Iterator<SpeciesRow> beanReader = CabReader.beanReader(SpeciesRow.class, reader).iterator();
while ((row = reader.readNext()) != null) { SpeciesRow speciesRow=null;
try { while (beanReader.hasNext() && (speciesRow = beanReader.next()) != null) {
SpeciesRow speciesRow=TaxonomyReader.toSpecies(row); checkAuthority(authors, "SPECIES_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSpeciesAuthority());
checkAuthority(authors, "SPECIES_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSpeciesAuthority()); checkAuthority(authors, "SUBSPECIES_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSubspeciesAuthority());
checkAuthority(authors, "SUBSPECIES_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSubspeciesAuthority()); checkAuthority(authors, "VARIETY_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getVarietyAuthority());
checkAuthority(authors, "VARIETY_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getVarietyAuthority()); checkAuthority(authors, "SUBVARIETY_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSubvarietyAuthority());
checkAuthority(authors, "SUBVARIETY_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSubvarietyAuthority()); checkAuthority(authors, "FORMA_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getFormaAuthority());
checkAuthority(authors, "FORMA_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getFormaAuthority()); checkAuthority(authors, "NAME_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getNameAuthority());
checkAuthority(authors, "NAME_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getNameAuthority());
} catch (ParseException e) {
String x="\n";
for (String y : row) {
x+=y + "\n";
}
LOG.warn("{} in row:\n{}", e.getMessage(), x, e);
}
} }
} }
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment