Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Genesys PGR
validator.genesys-pgr.org
Commits
b35b774a
Commit
b35b774a
authored
Jun 14, 2020
by
Matija Obreza
Browse files
taxonomy-tools:1.3-SNAPSHOT with new CSV readers
parent
bc104c20
Changes
3
Hide whitespace changes
Inline
Side-by-side
pom.xml
View file @
b35b774a
...
...
@@ -18,7 +18,7 @@
<jdk.source>
1.8
</jdk.source>
<commons-lang3.version>
3.3.2
</commons-lang3.version>
<genesys-geotools.version>
1.1-SNAPSHOT
</genesys-geotools.version>
<taxonomy-tools.version>
1.
2
-SNAPSHOT
</taxonomy-tools.version>
<taxonomy-tools.version>
1.
3
-SNAPSHOT
</taxonomy-tools.version>
</properties>
<licenses>
...
...
src/main/java/org/genesys/taxonomy/checker/web/service/impl/TaxonomyProcessServiceImpl.java
View file @
b35b774a
...
...
@@ -19,7 +19,6 @@ import java.io.File;
import
java.io.FileInputStream
;
import
java.io.FileNotFoundException
;
import
java.io.IOException
;
import
java.io.InputStreamReader
;
import
java.io.Reader
;
import
java.io.UnsupportedEncodingException
;
import
java.text.ParseException
;
...
...
@@ -27,6 +26,7 @@ import java.util.ArrayList;
import
java.util.Arrays
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.Iterator
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Map
;
...
...
@@ -38,7 +38,7 @@ import org.genesys.taxonomy.checker.TaxonomyChecker;
import
org.genesys.taxonomy.checker.TaxonomyException
;
import
org.genesys.taxonomy.checker.web.service.ProcessService
;
import
org.genesys.taxonomy.checker.web.util.ApplicationUtils
;
import
org.genesys.taxonomy.gringlobal.component.
Taxonomy
Reader
;
import
org.genesys.taxonomy.gringlobal.component.
Cab
Reader
;
import
org.genesys.taxonomy.gringlobal.model.GenusRow
;
import
org.genesys.taxonomy.gringlobal.model.SpeciesRow
;
import
org.slf4j.Logger
;
...
...
@@ -99,18 +99,20 @@ public class TaxonomyProcessServiceImpl implements ProcessService {
LOG
.
info
(
"Loading taxonomy_genus.txt"
);
// read taxonomy_genus.txt
try
(
CSVReader
reader
=
TaxonomyReader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
new
File
(
rootDir
,
"taxonomy_genus.txt"
)),
"UTF-8"
),
1
))
{
try
(
CSVReader
reader
=
CabReader
.
openCsvReader
(
new
FileInputStream
(
new
File
(
rootDir
,
"taxonomy_genus.txt"
)),
0
))
{
Iterator
<
GenusRow
>
beanReader
=
CabReader
.
beanReader
(
GenusRow
.
class
,
reader
).
iterator
();
GenusRow
genusRow
=
null
;
while
(
(
genusRow
=
TaxonomyReader
.
toGenus
(
reader
.
readN
ext
())
)
!=
null
)
{
while
(
beanReader
.
hasNext
()
&&
(
genusRow
=
beanReader
.
n
ext
())
!=
null
)
{
taxonomyDatabase
.
registerGenus
(
genusRow
.
getGenusId
(),
genusRow
.
getGenusName
());
}
}
LOG
.
info
(
"Loading taxonomy_species.txt"
);
// read taxonomy_species.txt
try
(
CSVReader
reader
=
TaxonomyReader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
new
File
(
rootDir
,
"taxonomy_species.txt"
)),
"UTF-8"
),
1
))
{
try
(
CSVReader
reader
=
CabReader
.
openCsvReader
(
new
FileInputStream
(
new
File
(
rootDir
,
"taxonomy_species.txt"
)),
0
))
{
Iterator
<
SpeciesRow
>
beanReader
=
CabReader
.
beanReader
(
SpeciesRow
.
class
,
reader
).
iterator
();
SpeciesRow
speciesRow
=
null
;
while
(
(
speciesRow
=
TaxonomyReader
.
toSpecies
(
r
eader
.
readN
ext
())
)
!=
null
)
{
while
(
beanReader
.
hasNext
()
&&
(
speciesRow
=
beanR
eader
.
n
ext
())
!=
null
)
{
taxonomyDatabase
.
registerSpecies
(
speciesRow
);
}
}
...
...
@@ -331,7 +333,7 @@ public class TaxonomyProcessServiceImpl implements ProcessService {
outputLine
[
outputHeaders
.
indexOf
(
ApplicationUtils
.
HEADER_GRINTAX_SPECIESCURRENT
)]
=
Boolean
.
toString
(
speciesRow
.
isCurrent
());
if
(!
speciesRow
.
isCurrent
()
&&
toCurrentTaxa
)
{
LOG
.
debug
(
"{} is not current"
,
speciesRow
);
final
SpeciesRow
currentSpecies
=
taxonomyDatabase
.
getSpeciesRow
(
speciesRow
.
getCurrentSpeciesId
());
final
SpeciesRow
currentSpecies
=
taxonomyDatabase
.
getSpeciesRow
(
speciesRow
.
getCurrent
Taxonomy
SpeciesId
());
final
String
currentGenus
=
taxonomyDatabase
.
getGenus
(
currentSpecies
.
getGenusId
());
// LOG.warn("Result: {}", Arrays.toString(outputLine));
updateOutputColumn
(
outputLine
,
outputHeaders
.
indexOf
(
ApplicationUtils
.
HEADER_GENUS_CHECK
),
currentGenus
,
genus
,
ApplicationUtils
.
CURRENT_SUFFIX
);
...
...
src/test/java/org/genesys/grin/WhatsWrong.java
View file @
b35b774a
...
...
@@ -3,11 +3,11 @@ package org.genesys.grin;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.io.InputStreamReader
;
import
java.text.ParseException
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashSet
;
import
java.util.Iterator
;
import
java.util.Map
;
import
java.util.Set
;
...
...
@@ -15,7 +15,7 @@ import org.apache.commons.lang3.StringEscapeUtils;
import
org.apache.commons.lang3.StringUtils
;
import
org.genesys.taxonomy.checker.web.config.ApplicationConfig
;
import
org.genesys.taxonomy.download.TaxonomyDownloader
;
import
org.genesys.taxonomy.gringlobal.component.
Taxonomy
Reader
;
import
org.genesys.taxonomy.gringlobal.component.
Cab
Reader
;
import
org.genesys.taxonomy.gringlobal.model.AuthorRow
;
import
org.genesys.taxonomy.gringlobal.model.GenusRow
;
import
org.genesys.taxonomy.gringlobal.model.SpeciesRow
;
...
...
@@ -58,7 +58,7 @@ public class WhatsWrong {
TaxonomyDownloader
.
unpackCabinetFile
(
downloadedCabFile
,
dataFolder
,
false
);
}
try
(
CSVReader
reader
=
Taxonomy
Reader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
taxonomyAuthor
),
"UTF-8"
),
1
))
{
try
(
CSVReader
reader
=
Cab
Reader
.
openCsvReader
(
new
FileInputStream
(
taxonomyAuthor
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!=
null
)
{
String
name
=
row
[
1
];
...
...
@@ -99,13 +99,14 @@ public class WhatsWrong {
Map
<
String
,
AuthorRow
>
authors
=
new
HashMap
<>();
try
(
CSVReader
reader
=
TaxonomyReader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
taxonomyAuthor
),
"UTF-8"
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!=
null
)
{
AuthorRow
authorRow
=
TaxonomyReader
.
toAuthor
(
row
);
try
(
CSVReader
reader
=
CabReader
.
openCsvReader
(
new
FileInputStream
(
taxonomyAuthor
),
0
))
{
Iterator
<
AuthorRow
>
beanReader
=
CabReader
.
beanReader
(
AuthorRow
.
class
,
reader
).
iterator
();
AuthorRow
authorRow
=
null
;
while
(
beanReader
.
hasNext
()
&&
(
authorRow
=
beanReader
.
next
())
!=
null
)
{
authors
.
put
(
authorRow
.
getShortName
(),
authorRow
);
if
(!
authorRow
.
getShortName
().
equals
(
authorRow
.
getShortName
Html
()))
{
String
unescaped
=
StringEscapeUtils
.
unescapeHtml4
(
authorRow
.
getShortName
Html
());
if
(!
authorRow
.
getShortName
().
equals
(
authorRow
.
getShortName
()))
{
String
unescaped
=
StringEscapeUtils
.
unescapeHtml4
(
authorRow
.
getShortName
());
if
(
unescaped
!=
null
&&
!
unescaped
.
equals
(
authorRow
.
getShortName
()))
{
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
}
...
...
@@ -116,7 +117,7 @@ public class WhatsWrong {
Set
<
String
>
authorCache
=
new
HashSet
<>();
try
(
CSVReader
reader
=
Taxonomy
Reader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
familyFile
),
"UTF-8"
),
1
))
{
try
(
CSVReader
reader
=
Cab
Reader
.
openCsvReader
(
new
FileInputStream
(
familyFile
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!=
null
)
{
String
authorName
=
row
[
6
];
...
...
@@ -155,13 +156,13 @@ public class WhatsWrong {
Map
<
String
,
AuthorRow
>
authors
=
new
HashMap
<>();
try
(
CSVReader
reader
=
Taxonomy
Reader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
taxonomyAuthor
),
"UTF-8"
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!
=
null
)
{
AuthorRow
authorRow
=
TaxonomyReader
.
toAuthor
(
row
);
try
(
CSVReader
reader
=
Cab
Reader
.
openCsvReader
(
new
FileInputStream
(
taxonomyAuthor
),
0
))
{
Iterator
<
AuthorRow
>
beanReader
=
CabReader
.
beanReader
(
AuthorRow
.
class
,
reader
).
iterator
()
;
AuthorRow
authorRow
=
null
;
while
(
beanReader
.
hasNext
()
&&
(
authorRow
=
beanReader
.
next
())
!=
null
)
{
authors
.
put
(
authorRow
.
getShortName
(),
authorRow
);
if
(!
authorRow
.
getShortName
().
equals
(
authorRow
.
getShortName
Html
()))
{
String
unescaped
=
StringEscapeUtils
.
unescapeHtml4
(
authorRow
.
getShortName
Html
());
if
(!
authorRow
.
getShortName
().
equals
(
authorRow
.
getShortName
()))
{
String
unescaped
=
StringEscapeUtils
.
unescapeHtml4
(
authorRow
.
getShortName
());
if
(
unescaped
!=
null
&&
!
unescaped
.
equals
(
authorRow
.
getShortName
()))
{
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
}
...
...
@@ -172,10 +173,10 @@ public class WhatsWrong {
Set
<
String
>
authorCache
=
new
HashSet
<>();
try
(
CSVReader
reader
=
Taxonomy
Reader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
genusFile
),
"UTF-8"
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!=
null
)
{
GenusRow
genusRow
=
TaxonomyReader
.
toGenus
(
row
);
try
(
CSVReader
reader
=
Cab
Reader
.
openCsvReader
(
new
FileInputStream
(
genusFile
),
0
))
{
Iterator
<
GenusRow
>
beanReader
=
CabReader
.
beanReader
(
GenusRow
.
class
,
reader
).
iterator
()
;
GenusRow
genusRow
=
null
;
while
(
beanReader
.
hasNext
()
&&
(
genusRow
=
beanReader
.
next
())
!=
null
)
{
if
(
authorCache
.
contains
(
genusRow
.
getGenusAuthority
()))
{
continue
;
...
...
@@ -186,9 +187,9 @@ public class WhatsWrong {
for
(
String
name
:
authorNames
)
{
AuthorRow
authorRow
=
authors
.
get
(
name
);
if
(
authorRow
==
null
)
{
LOG
.
warn
(
"No author name\t{}\tin authority\t{}\tfor genus\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}"
,
name
,
genusRow
.
getGenusAuthority
(),
genusRow
.
getGenusName
(),
genusRow
.
getGenusId
(),
genusRow
.
is
Current
());
LOG
.
warn
(
"No author name\t{}\tin authority\t{}\tfor genus\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}"
,
name
,
genusRow
.
getGenusAuthority
(),
genusRow
.
getGenusName
(),
genusRow
.
getGenusId
(),
genusRow
.
get
Current
TaxonomyGenusId
());
}
else
if
(!
name
.
equals
(
authorRow
.
getShortName
()))
{
LOG
.
warn
(
"Genus\t{}\tauthority\t{}={}\tshould be\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}"
,
genusRow
.
getGenusName
(),
genusRow
.
getGenusAuthority
(),
authorRow
.
getShortName
(),
authorRow
.
getShortName
Html
(),
genusRow
.
getGenusId
(),
genusRow
.
is
Current
());
LOG
.
warn
(
"Genus\t{}\tauthority\t{}={}\tshould be\t{}\tin TAXONOMY_GENUS_ID=\t{}\t{}"
,
genusRow
.
getGenusName
(),
genusRow
.
getGenusAuthority
(),
authorRow
.
getShortName
(),
authorRow
.
getShortName
(),
genusRow
.
getGenusId
(),
genusRow
.
get
Current
TaxonomyGenusId
());
}
}
authorCache
.
add
(
genusRow
.
getGenusAuthority
());
...
...
@@ -220,13 +221,13 @@ public class WhatsWrong {
Map
<
String
,
AuthorRow
>
authors
=
new
HashMap
<>();
try
(
CSVReader
reader
=
Taxonomy
Reader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
taxonomyAuthor
),
"UTF-8"
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!
=
null
)
{
AuthorRow
authorRow
=
TaxonomyReader
.
toAuthor
(
row
);
try
(
CSVReader
reader
=
Cab
Reader
.
openCsvReader
(
new
FileInputStream
(
taxonomyAuthor
),
0
))
{
Iterator
<
AuthorRow
>
beanReader
=
CabReader
.
beanReader
(
AuthorRow
.
class
,
reader
).
iterator
()
;
AuthorRow
authorRow
=
null
;
while
(
beanReader
.
hasNext
()
&&
(
authorRow
=
beanReader
.
next
())
!=
null
)
{
authors
.
put
(
authorRow
.
getShortName
(),
authorRow
);
if
(!
authorRow
.
getShortName
().
equals
(
authorRow
.
getShortName
Html
()))
{
String
unescaped
=
StringEscapeUtils
.
unescapeHtml4
(
authorRow
.
getShortName
Html
());
if
(!
authorRow
.
getShortName
().
equals
(
authorRow
.
getShortName
()))
{
String
unescaped
=
StringEscapeUtils
.
unescapeHtml4
(
authorRow
.
getShortName
());
if
(
unescaped
!=
null
&&
!
unescaped
.
equals
(
authorRow
.
getShortName
()))
{
// LOG.warn("Author {} = {} should be {} in TAXONOMY_AUTHOR_ID={}", authorRow.getShortName(), authorRow.htmlName, unescaped, authorRow.authorId);
}
...
...
@@ -235,24 +236,16 @@ public class WhatsWrong {
}
}
try
(
CSVReader
reader
=
TaxonomyReader
.
openCsvReader
(
new
InputStreamReader
(
new
FileInputStream
(
speciesFile
),
"UTF-8"
),
1
))
{
String
[]
row
=
null
;
while
((
row
=
reader
.
readNext
())
!=
null
)
{
try
{
SpeciesRow
speciesRow
=
TaxonomyReader
.
toSpecies
(
row
);
checkAuthority
(
authors
,
"SPECIES_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getSpeciesAuthority
());
checkAuthority
(
authors
,
"SUBSPECIES_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getSubspeciesAuthority
());
checkAuthority
(
authors
,
"VARIETY_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getVarietyAuthority
());
checkAuthority
(
authors
,
"SUBVARIETY_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getSubvarietyAuthority
());
checkAuthority
(
authors
,
"FORMA_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getFormaAuthority
());
checkAuthority
(
authors
,
"NAME_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getNameAuthority
());
}
catch
(
ParseException
e
)
{
String
x
=
"\n"
;
for
(
String
y
:
row
)
{
x
+=
y
+
"\n"
;
}
LOG
.
warn
(
"{} in row:\n{}"
,
e
.
getMessage
(),
x
,
e
);
}
try
(
CSVReader
reader
=
CabReader
.
openCsvReader
(
new
FileInputStream
(
speciesFile
),
0
))
{
Iterator
<
SpeciesRow
>
beanReader
=
CabReader
.
beanReader
(
SpeciesRow
.
class
,
reader
).
iterator
();
SpeciesRow
speciesRow
=
null
;
while
(
beanReader
.
hasNext
()
&&
(
speciesRow
=
beanReader
.
next
())
!=
null
)
{
checkAuthority
(
authors
,
"SPECIES_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getSpeciesAuthority
());
checkAuthority
(
authors
,
"SUBSPECIES_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getSubspeciesAuthority
());
checkAuthority
(
authors
,
"VARIETY_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getVarietyAuthority
());
checkAuthority
(
authors
,
"SUBVARIETY_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getSubvarietyAuthority
());
checkAuthority
(
authors
,
"FORMA_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getFormaAuthority
());
checkAuthority
(
authors
,
"NAME_AUTHORITY"
,
speciesRow
.
getSpeciesId
(),
speciesRow
.
getName
(),
speciesRow
.
getNameAuthority
());
}
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment