Commit fb5a5b12 authored by Matija Obreza's avatar Matija Obreza
Browse files

ITPGRFA workarounds

parent cd8c6cc6
......@@ -52,7 +52,7 @@ public interface CountryRepository extends JpaRepository<Country, Long> {
@Query("select distinct c.refnameId from Country c where c.refnameId is not null")
List<Long> listRefnameIds();
@Query("select distinct itpgrfa.country from ITPGRFAStatus itpgrfa where itpgrfa.contractingParty != 'No' and itpgrfa.country.current = true")
@Query("select distinct itpgrfa.country from ITPGRFAStatus itpgrfa where itpgrfa.contractingParty = 'Yes'")
List<Country> findITPGRFA();
@Query("select distinct c from Country c where c.nameL like :pattern")
......
......@@ -21,9 +21,10 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
......@@ -67,6 +68,29 @@ public class ITPGRFAStatusUpdater {
private static final int BATCH_SIZE = 20;
// FIXME Need ISO CODE3 in ITPGRFA CSV file!!
private static final Map<String, String> workarounds = new HashMap<String, String>();
private static final String[] CSV_HEADERS = { "Country", "FAO Region 1", "FAO Region 2", "CP", "Name of NFP", "Membership", "by", "Income", "Development" };
static {
workarounds.put("Democratic People's Republic of Korea", "PRK");
workarounds.put("Iran (Islamic Republic of)", "IRN");
workarounds.put("Cote d'Ivoire", "CIV");
workarounds.put("Micronesia (Federated States of)", "FSM");
workarounds.put("Republic of Moldova", "MDA");
workarounds.put("Lao People's Democratic Republic", "LAO");
workarounds.put("Republic of Korea", "KOR");
workarounds.put("Syrian Arab Republic", "SYR");
workarounds.put("The former Yugoslav Republic of Macedonia", "MKD");
workarounds.put("Russian Federation", "RUS");
workarounds.put("United Republic of Tanzania", "TZA");
workarounds.put("United States of America", "USA");
workarounds.put("Venezuela (Bolivarian Republic of)", "VEN");
workarounds.put("Viet Nam", "VNM");
LOG.warn("There are still " + workarounds.size() + " hardcoded workarounds for country names in ITPGRFA CSV file!");
}
/**
* Update local {@link ITPGRFAStatus} entries with data from CSV
*
......@@ -86,8 +110,7 @@ public class ITPGRFAStatusUpdater {
LOG.debug(entity.getContentType() + " " + entity.getContentLength());
// If the response does not enclose an entity, there is no
// need
// to bother about connection release
// need to bother about connection release
if (entity != null) {
itpgrfaCSVStream = new BufferedInputStream(entity.getContent());
}
......@@ -109,12 +132,25 @@ public class ITPGRFAStatusUpdater {
}
private void updateFromStream(InputStream instream) throws IOException {
CSVReader reader = new CSVReader(new BufferedReader(new InputStreamReader(instream)), ',', '"', '\\', 1, true, true);
CSVReader reader = new CSVReader(new BufferedReader(new InputStreamReader(instream)), ',', '"', '\\', 0, false, true);
try {
final List<String[]> batch = new ArrayList<String[]>(BATCH_SIZE);
String[] line = null;
// Get headers
String[] line = reader.readNext();
LOG.warn("Got headers: " + ArrayUtils.toString(line));
if (CSV_HEADERS.length > line.length) {
reader.close();
throw new IOException("CSV header count mismatch. Found: " + ArrayUtils.toString(line));
}
for (int i = CSV_HEADERS.length - 1; i >= 0; i--) {
if (!line[i].equals(CSV_HEADERS[i])) {
reader.close();
throw new IOException("CSV header mismatch, found '" + line[i] + "' instead of '" + CSV_HEADERS[i] + "'");
}
}
// Timer
StopWatch stopWatch = new StopWatch();
......@@ -132,13 +168,19 @@ public class ITPGRFAStatusUpdater {
continue;
}
if (batch.size() < BATCH_SIZE) {
batch.add(line);
} else {
if (workarounds.containsKey(line[0])) {
LOG.warn("Replacing " + line[0] + " with " + workarounds.get(line[0]));
line[0] = workarounds.get(line[0]);
}
LOG.warn(">>> " + ArrayUtils.toString(line, "NULL"));
batch.add(line);
if (batch.size() >= BATCH_SIZE) {
workIt(batch);
batch.clear();
}
}
if (batch.size() > 0) {
......@@ -150,10 +192,8 @@ public class ITPGRFAStatusUpdater {
stopWatch.stop();
LOG.info("Done importing ITPGRFA status in " + stopWatch.getTime() + "ms");
} catch (UnsupportedEncodingException e) {
LOG.error(e.getMessage(), e);
} finally {
IOUtils.closeQuietly(instream);
IOUtils.closeQuietly(reader);
}
}
......@@ -180,8 +220,7 @@ public class ITPGRFAStatusUpdater {
Country country = geoService.getCountry(countryName);
if (country == null) {
if (LOG.isDebugEnabled())
LOG.warn("No country with name=" + countryName);
LOG.error("No country with name=" + countryName);
return;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment