Commit edb4cacf authored by Matija Obreza's avatar Matija Obreza
Browse files

WIEWS export_c.txt format updated

parent b3d9dc34
...@@ -21,6 +21,7 @@ import static org.genesys2.util.NumberUtils.parseDoubleIgnore0; ...@@ -21,6 +21,7 @@ import static org.genesys2.util.NumberUtils.parseDoubleIgnore0;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -29,6 +30,7 @@ import java.util.zip.ZipEntry; ...@@ -29,6 +30,7 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream; import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
...@@ -49,6 +51,32 @@ import au.com.bytecode.opencsv.CSVReader; ...@@ -49,6 +51,32 @@ import au.com.bytecode.opencsv.CSVReader;
@Component @Component
public class InstituteUpdater { public class InstituteUpdater {
public static final String WIEWS_EXPORT_URL = "http://www.fao.org/wiews-archive/export_c.zip"; public static final String WIEWS_EXPORT_URL = "http://www.fao.org/wiews-archive/export_c.zip";
public static final String[] WIEWS_EXPORT_C_HEADERS = { "INSTCODE", "ACRONYM", "ECPACRONYM", "FULL_NAME", "TYPE", "GENEBANK_LONG_TERM_COLLECTIONS",
"BOTANICAL_GARDEN", "GENEBANK_MEDIUM_TERM_COLLECTIONS", "GENEBANK_SHORT_TERM_COLLECTIONS", "STREET_POB", "CITY_STATE", "ZIP_CODE", "PHONE", "FAX",
"EMAIL", "URL", "LATITUDE", "LONGITUDE", "ALTITUDE", "UPDATED_ON", "V_INSTCODE", "ISO3" };
public final static int COL_INSTCODE = 0;
public final static int COL_ACRONYM = 1;
public final static int COL_ECPACRONYM = 2;
public final static int COL_FULL_NAME = 3;
public final static int COL_TYPE = 4;
public final static int COL_GENEBANK_LONG_TERM_COLLECTIONS = 5;
public final static int COL_BOTANICAL_GARDEN = 6;
public final static int COL_GENEBANK_MEDIUM_TERM_COLLECTIONS = 7;
public final static int COL_GENEBANK_SHORT_TERM_COLLECTIONS = 8;
public final static int COL_STREET_POB = 9;
public final static int COL_CITY_STATE = 10;
public final static int COL_ZIP_CODE = 11;
public final static int COL_PHONE = 12;
public final static int COL_FAX = 13;
public final static int COL_EMAIL = 14;
public final static int COL_URL = 15;
public final static int COL_LATITUDE = 16;
public final static int COL_LONGITUDE = 17;
public final static int COL_ALTITUDE = 18;
public final static int COL_UPDATED_ON = 19;
public final static int COL_V_INSTCODE = 20;
public final static int COL_ISO3 = 21;
public static final Log LOG = LogFactory.getLog(InstituteUpdater.class); public static final Log LOG = LogFactory.getLog(InstituteUpdater.class);
...@@ -92,6 +120,7 @@ public class InstituteUpdater { ...@@ -92,6 +120,7 @@ public class InstituteUpdater {
// to bother about connection release // to bother about connection release
if (entity != null) { if (entity != null) {
ZipInputStream instream = null; ZipInputStream instream = null;
CSVReader reader = null;
try { try {
instream = new ZipInputStream(entity.getContent()); instream = new ZipInputStream(entity.getContent());
...@@ -103,7 +132,15 @@ public class InstituteUpdater { ...@@ -103,7 +132,15 @@ public class InstituteUpdater {
} }
final InputStreamReader inreader = new InputStreamReader(instream, "UTF-8"); final InputStreamReader inreader = new InputStreamReader(instream, "UTF-8");
final CSVReader reader = new CSVReader(inreader, ',', '"', false); reader = new CSVReader(inreader, ',', '"', false);
// Ensure headers match known format
final String[] headers = reader.readNext();
LOG.warn("export_c.txt headers: " + ArrayUtils.toString(headers, "<null>"));
if (WIEWS_EXPORT_C_HEADERS.length != headers.length || !Arrays.equals(WIEWS_EXPORT_C_HEADERS, headers)) {
throw new InstituteUpdateException("export_c.txt headers mismatch: " + ArrayUtils.toString(headers, "<null>"));
}
final Map<String, String[]> batch = new HashMap<String, String[]>(BATCH_SIZE); final Map<String, String[]> batch = new HashMap<String, String[]>(BATCH_SIZE);
...@@ -118,7 +155,7 @@ public class InstituteUpdater { ...@@ -118,7 +155,7 @@ public class InstituteUpdater {
// LOG.info(ArrayUtils.toString(line)); // LOG.info(ArrayUtils.toString(line));
// } // }
final String instCode = line[0]; final String instCode = line[COL_INSTCODE];
batch.put(instCode, line); batch.put(instCode, line);
if (batch.size() == BATCH_SIZE) { if (batch.size() == BATCH_SIZE) {
...@@ -135,9 +172,11 @@ public class InstituteUpdater { ...@@ -135,9 +172,11 @@ public class InstituteUpdater {
} catch (final RuntimeException ex) { } catch (final RuntimeException ex) {
LOG.error(ex.getMessage(), ex); LOG.error(ex.getMessage(), ex);
httpget.abort(); httpget.abort();
} catch (InstituteUpdateException e) {
throw new IOException(e);
} finally { } finally {
// Closing the input stream will trigger connection IOUtils.closeQuietly(reader);
// release
IOUtils.closeQuietly(instream); IOUtils.closeQuietly(instream);
} }
} }
...@@ -201,27 +240,27 @@ public class InstituteUpdater { ...@@ -201,27 +240,27 @@ public class InstituteUpdater {
} }
private boolean updateData(FaoInstitute faoInstitute, String[] line) { private boolean updateData(FaoInstitute faoInstitute, String[] line) {
final String instCode = line[0]; final String instCode = line[COL_INSTCODE];
final String acronym = line[1]; final String acronym = line[COL_ACRONYM];
// final String ecpaAcronym = line[2]; // final String ecpaAcronym = line[COL_ECPACRONYM];
final String fullName = line[3]; final String fullName = line[COL_FULL_NAME];
final String type = line[4]; final String type = line[COL_TYPE];
final boolean pgrActivity = "Y".equals(line[5]); final boolean maintColl = "1".equals(line[COL_GENEBANK_LONG_TERM_COLLECTIONS]) || "1".equals(line[COL_GENEBANK_MEDIUM_TERM_COLLECTIONS])
final boolean maintColl = "Y".equals(line[6]); || "1".equals(line[COL_GENEBANK_SHORT_TERM_COLLECTIONS]);
// final String streetPob = line[7]; final boolean pgrActivity = maintColl || "1".equals(line[COL_BOTANICAL_GARDEN]);
// final String cityState = line[8]; // final String streetPob = line[COL_STREET_POB];
// final String zipCode = line[9]; // final String cityState = line[COL_CITY_STATE];
// final String phone = line[10]; // final String zipCode = line[COL_ZIP_CODE];
// final String fax = line[11]; // final String phone = line[COL_PHONE];
final String email = line[12]; // final String fax = line[COL_FAX];
final String url = line[13]; final String email = line[COL_EMAIL];
final String latitude = line[14]; final String url = line[COL_URL];
final String longitude = line[15]; final String latitude = line[COL_LATITUDE];
final String elevation = line[16]; final String longitude = line[COL_LONGITUDE];
// final String updatedOn = line[17]; final String elevation = line[COL_ALTITUDE];
// V_INSTCODE === New instcode? // final String updatedOn = line[COL_UPDATED_ON];
final String vInstCode = line[18]; final String vInstCode = line[COL_V_INSTCODE];
final String isoCountry = line[19]; final String isoCountry = line[COL_ISO3];
if (faoInstitute.getCode() == null) { if (faoInstitute.getCode() == null) {
faoInstitute.setCode(instCode); faoInstitute.setCode(instCode);
...@@ -251,4 +290,13 @@ public class InstituteUpdater { ...@@ -251,4 +290,13 @@ public class InstituteUpdater {
} }
}); });
} }
public static class InstituteUpdateException extends Exception {
private static final long serialVersionUID = 1L;
public InstituteUpdateException(String message) {
super(message);
}
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment