Commit b35b774a authored by Matija Obreza's avatar Matija Obreza
Browse files

taxonomy-tools:1.3-SNAPSHOT with new CSV readers

parent bc104c20
......@@ -18,7 +18,7 @@
<jdk.source>1.8</jdk.source>
<commons-lang3.version>3.3.2</commons-lang3.version>
<genesys-geotools.version>1.1-SNAPSHOT</genesys-geotools.version>
<taxonomy-tools.version>1.2-SNAPSHOT</taxonomy-tools.version>
<taxonomy-tools.version>1.3-SNAPSHOT</taxonomy-tools.version>
</properties>
<licenses>
......
......@@ -19,7 +19,6 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
......@@ -27,6 +26,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
......@@ -38,7 +38,7 @@ import org.genesys.taxonomy.checker.TaxonomyChecker;
import org.genesys.taxonomy.checker.TaxonomyException;
import org.genesys.taxonomy.checker.web.service.ProcessService;
import org.genesys.taxonomy.checker.web.util.ApplicationUtils;
import org.genesys.taxonomy.gringlobal.component.TaxonomyReader;
import org.genesys.taxonomy.gringlobal.component.CabReader;
import org.genesys.taxonomy.gringlobal.model.GenusRow;
import org.genesys.taxonomy.gringlobal.model.SpeciesRow;
import org.slf4j.Logger;
......@@ -99,18 +99,20 @@ public class TaxonomyProcessServiceImpl implements ProcessService {
LOG.info("Loading taxonomy_genus.txt");
// read taxonomy_genus.txt
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(new File(rootDir, "taxonomy_genus.txt")), "UTF-8"), 1)) {
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(rootDir, "taxonomy_genus.txt")), 0)) {
Iterator<GenusRow> beanReader = CabReader.beanReader(GenusRow.class, reader).iterator();
GenusRow genusRow = null;
while ((genusRow = TaxonomyReader.toGenus(reader.readNext())) != null) {
while (beanReader.hasNext() && (genusRow = beanReader.next()) != null) {
taxonomyDatabase.registerGenus(genusRow.getGenusId(), genusRow.getGenusName());
}
}
LOG.info("Loading taxonomy_species.txt");
// read taxonomy_species.txt
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(new File(rootDir, "taxonomy_species.txt")), "UTF-8"), 1)) {
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(rootDir, "taxonomy_species.txt")), 0)) {
Iterator<SpeciesRow> beanReader = CabReader.beanReader(SpeciesRow.class, reader).iterator();
SpeciesRow speciesRow = null;
while ((speciesRow = TaxonomyReader.toSpecies(reader.readNext())) != null) {
while (beanReader.hasNext() && (speciesRow = beanReader.next()) != null) {
taxonomyDatabase.registerSpecies(speciesRow);
}
}
......@@ -331,7 +333,7 @@ public class TaxonomyProcessServiceImpl implements ProcessService {
outputLine[outputHeaders.indexOf(ApplicationUtils.HEADER_GRINTAX_SPECIESCURRENT)] = Boolean.toString(speciesRow.isCurrent());
if (!speciesRow.isCurrent() && toCurrentTaxa) {
LOG.debug("{} is not current", speciesRow);
final SpeciesRow currentSpecies = taxonomyDatabase.getSpeciesRow(speciesRow.getCurrentSpeciesId());
final SpeciesRow currentSpecies = taxonomyDatabase.getSpeciesRow(speciesRow.getCurrentTaxonomySpeciesId());
final String currentGenus = taxonomyDatabase.getGenus(currentSpecies.getGenusId());
// LOG.warn("Result: {}", Arrays.toString(outputLine));
updateOutputColumn(outputLine, outputHeaders.indexOf(ApplicationUtils.HEADER_GENUS_CHECK), currentGenus, genus, ApplicationUtils.CURRENT_SUFFIX);
......
......@@ -3,11 +3,11 @@ package org.genesys.grin;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
......@@ -15,7 +15,7 @@ import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.taxonomy.checker.web.config.ApplicationConfig;
import org.genesys.taxonomy.download.TaxonomyDownloader;
import org.genesys.taxonomy.gringlobal.component.TaxonomyReader;
import org.genesys.taxonomy.gringlobal.component.CabReader;
import org.genesys.taxonomy.gringlobal.model.AuthorRow;
import org.genesys.taxonomy.gringlobal.model.GenusRow;
import org.genesys.taxonomy.gringlobal.model.SpeciesRow;
......@@ -58,7 +58,7 @@ public class WhatsWrong {
TaxonomyDownloader.unpackCabinetFile(downloadedCabFile, dataFolder, false);
}
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) {
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
String name = row[1];
......@@ -99,13 +99,14 @@ public class WhatsWrong {
Map<String, AuthorRow> authors = new HashMap<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
AuthorRow authorRow = TaxonomyReader.toAuthor(row);
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 0)) {
Iterator<AuthorRow> beanReader = CabReader.beanReader(AuthorRow.class, reader).iterator();
AuthorRow authorRow = null;
while (beanReader.hasNext() && (authorRow = beanReader.next()) != null) {
authors.put(authorRow.getShortName(), authorRow);
if (! authorRow.getShortName().equals(authorRow.getShortNameHtml())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortNameHtml());
if (! authorRow.getShortName().equals(authorRow.getShortName())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortName());
if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) {
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
}
......@@ -116,7 +117,7 @@ public class WhatsWrong {
Set<String> authorCache = new HashSet<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(familyFile), "UTF-8"), 1)) {
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(familyFile), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
String authorName = row[6];
......@@ -155,13 +156,13 @@ public class WhatsWrong {
Map<String, AuthorRow> authors = new HashMap<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
AuthorRow authorRow = TaxonomyReader.toAuthor(row);
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 0)) {
Iterator<AuthorRow> beanReader = CabReader.beanReader(AuthorRow.class, reader).iterator();
AuthorRow authorRow = null;
while (beanReader.hasNext() && (authorRow = beanReader.next()) != null) {
authors.put(authorRow.getShortName(), authorRow);
if (! authorRow.getShortName().equals(authorRow.getShortNameHtml())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortNameHtml());
if (! authorRow.getShortName().equals(authorRow.getShortName())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortName());
if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) {
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
}
......@@ -172,10 +173,10 @@ public class WhatsWrong {
Set<String> authorCache = new HashSet<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(genusFile), "UTF-8"), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
GenusRow genusRow=TaxonomyReader.toGenus(row);
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(genusFile), 0)) {
Iterator<GenusRow> beanReader = CabReader.beanReader(GenusRow.class, reader).iterator();
GenusRow genusRow=null;
while (beanReader.hasNext() && (genusRow = beanReader.next()) != null) {
if (authorCache.contains(genusRow.getGenusAuthority())) {
continue;
......@@ -186,9 +187,9 @@ public class WhatsWrong {
for (String name : authorNames) {
AuthorRow authorRow = authors.get(name);
if (authorRow == null) {
LOG.warn("No author name\t{}\tin authority\t{}\tfor genus\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", name, genusRow.getGenusAuthority(), genusRow.getGenusName(), genusRow.getGenusId(), genusRow.isCurrent());
LOG.warn("No author name\t{}\tin authority\t{}\tfor genus\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", name, genusRow.getGenusAuthority(), genusRow.getGenusName(), genusRow.getGenusId(), genusRow.getCurrentTaxonomyGenusId());
} else if (! name.equals(authorRow.getShortName())) {
LOG.warn("Genus\t{}\tauthority\t{}={}\tshould be\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", genusRow.getGenusName(), genusRow.getGenusAuthority(), authorRow.getShortName(), authorRow.getShortNameHtml(), genusRow.getGenusId(), genusRow.isCurrent());
LOG.warn("Genus\t{}\tauthority\t{}={}\tshould be\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}", genusRow.getGenusName(), genusRow.getGenusAuthority(), authorRow.getShortName(), authorRow.getShortName(), genusRow.getGenusId(), genusRow.getCurrentTaxonomyGenusId());
}
}
authorCache.add(genusRow.getGenusAuthority());
......@@ -220,13 +221,13 @@ public class WhatsWrong {
Map<String, AuthorRow> authors = new HashMap<>();
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(taxonomyAuthor), "UTF-8"), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
AuthorRow authorRow = TaxonomyReader.toAuthor(row);
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(taxonomyAuthor), 0)) {
Iterator<AuthorRow> beanReader = CabReader.beanReader(AuthorRow.class, reader).iterator();
AuthorRow authorRow = null;
while (beanReader.hasNext() && (authorRow = beanReader.next()) != null) {
authors.put(authorRow.getShortName(), authorRow);
if (! authorRow.getShortName().equals(authorRow.getShortNameHtml())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortNameHtml());
if (! authorRow.getShortName().equals(authorRow.getShortName())) {
String unescaped = StringEscapeUtils.unescapeHtml4(authorRow.getShortName());
if (unescaped != null && ! unescaped.equals(authorRow.getShortName())) {
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
}
......@@ -235,24 +236,16 @@ public class WhatsWrong {
}
}
try (CSVReader reader = TaxonomyReader.openCsvReader(new InputStreamReader(new FileInputStream(speciesFile), "UTF-8"), 1)) {
String[] row = null;
while ((row = reader.readNext()) != null) {
try {
SpeciesRow speciesRow=TaxonomyReader.toSpecies(row);
try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(speciesFile), 0)) {
Iterator<SpeciesRow> beanReader = CabReader.beanReader(SpeciesRow.class, reader).iterator();
SpeciesRow speciesRow=null;
while (beanReader.hasNext() && (speciesRow = beanReader.next()) != null) {
checkAuthority(authors, "SPECIES_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSpeciesAuthority());
checkAuthority(authors, "SUBSPECIES_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSubspeciesAuthority());
checkAuthority(authors, "VARIETY_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getVarietyAuthority());
checkAuthority(authors, "SUBVARIETY_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getSubvarietyAuthority());
checkAuthority(authors, "FORMA_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getFormaAuthority());
checkAuthority(authors, "NAME_AUTHORITY", speciesRow.getSpeciesId(), speciesRow.getName(), speciesRow.getNameAuthority());
} catch (ParseException e) {
String x="\n";
for (String y : row) {
x+=y + "\n";
}
LOG.warn("{} in row:\n{}", e.getMessage(), x, e);
}
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment