Commit 9b83c574 authored by Viacheslav Pavlov's avatar Viacheslav Pavlov Committed by Matija Obreza

ISO3166 and WIEWS vocabularies sourced from Genesys

- added tests
parent 4d84d753
......@@ -17,13 +17,10 @@ package org.genesys.catalog.api.v0;
import java.io.IOException;
import java.util.List;
import java.util.UUID;
import org.genesys.catalog.model.vocab.VocabularyTerm;
import org.genesys.catalog.service.VocabularyService;
import org.genesys.catalog.service.worker.ISO3166VocabularyUpdater;
import org.genesys2.server.api.ApiBaseController;
import org.genesys2.server.exception.NotFoundElement;
import org.genesys2.server.service.GeoService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
......@@ -46,46 +43,17 @@ import io.swagger.annotations.ApiOperation;
@PreAuthorize("isAuthenticated()")
@Api(tags = { "geo" })
public class GeoController {
/** The Constant API_BASE. */
public static final String CONTROLLER_URL = ApiBaseController.APIv0_BASE + "/geo";
/** The Constant ISO3166_2ALPHA. */
public static final UUID ISO3166_2ALPHA = ISO3166VocabularyUpdater.ISO3166_2ALPHA;
/** The Constant ISO3166_3ALPHA. */
public static final UUID ISO3166_3ALPHA = ISO3166VocabularyUpdater.ISO3166_3ALPHA;
/** The Constant ISO3166_NUMERIC. */
public static final UUID ISO3166_NUMERIC = ISO3166VocabularyUpdater.ISO3166_NUMERIC;
private static final Logger LOG = LoggerFactory.getLogger(GeoController.class);
@Autowired
private ISO3166VocabularyUpdater iso3166VocabularyUpdater;
@Autowired
private VocabularyService vocabularyService;
/**
* Update countries codes.
*
* @return the string
* @throws IOException Signals that an I/O exception has occurred.
*/
@PreAuthorize("hasRole('ADMINISTRATOR')")
@PostMapping(value = "/update")
@ApiOperation("Triggers update of ISO country code vocabularies")
public @ResponseBody String updateCountriesCodes() throws IOException {
LOG.info("Updating ISO country codes");
vocabularyService.autoUpdateOrCreateVocabulary(ISO3166_2ALPHA, iso3166VocabularyUpdater.getISO3166Alpha2Vocabulary());
vocabularyService.autoUpdateOrCreateVocabulary(ISO3166_3ALPHA, iso3166VocabularyUpdater.getISO3166Alpha3Vocabulary());
vocabularyService.autoUpdateOrCreateVocabulary(ISO3166_NUMERIC, iso3166VocabularyUpdater.getISO3166NumericVocabulary());
return "OK";
}
private GeoService geoService;
/**
* Gets the.
* Gets the country vocabulary term
*
* @param code the code
* @return the vocabulary term
......@@ -93,17 +61,7 @@ public class GeoController {
@GetMapping(value = "/iso3166/{code}", produces = MediaType.APPLICATION_JSON_VALUE)
@ApiOperation("Lookup ISO-3166 country by code")
public VocabularyTerm get(@PathVariable("code") final String code) {
final boolean isNumeric = code.chars().allMatch(Character::isDigit);
if (isNumeric) {
return vocabularyService.getVocabularyTerm(ISO3166_NUMERIC, code);
} else if (code.length() == 2) {
return vocabularyService.getVocabularyTerm(ISO3166_2ALPHA, code);
} else if (code.length() == 3) {
return vocabularyService.getVocabularyTerm(ISO3166_3ALPHA, code);
}
throw new NotFoundElement("Code is not in valid format: 3-letter | 2-letter | numeric");
return geoService.getCountryTerm(code);
}
/**
......@@ -115,9 +73,7 @@ public class GeoController {
*/
@GetMapping(value = "/iso3166/autocomplete", produces = MediaType.APPLICATION_JSON_VALUE)
public List<VocabularyTerm> autocompleteGeoTerm(@RequestParam("c") final String text) throws IOException {
return vocabularyService.autocompleteTerms(ISO3166_3ALPHA, text);
return geoService.autoCompleteTerm(text);
}
}
......@@ -18,12 +18,11 @@ package org.genesys.catalog.api.v0;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import org.genesys.catalog.model.vocab.VocabularyTerm;
import org.genesys.catalog.service.VocabularyService;
import org.genesys.catalog.service.worker.WiewsVocabularyUpdater;
import org.genesys2.server.api.ApiBaseController;
import org.genesys2.server.service.InstituteService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
......@@ -31,10 +30,8 @@ import org.springframework.http.MediaType;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import io.swagger.annotations.Api;
......@@ -55,30 +52,12 @@ public class WiewsController {
/** The Constant API_BASE. */
public static final String CONTROLLER_URL = ApiBaseController.APIv0_BASE + "/wiews";
/** The Constant FAO_WIEWS_UUID. */
public static final UUID FAO_WIEWS_UUID = WiewsVocabularyUpdater.FAO_WIEWS_UUID;
private static final Logger LOG = LoggerFactory.getLogger(WiewsController.class);
@Autowired
private VocabularyService vocabularyService;
private InstituteService instituteService;
@Autowired
private WiewsVocabularyUpdater wiewsVocabularyUpdater;
/**
* Update languages.
*
* @return the string
* @throws IOException Signals that an I/O exception has occurred.
*/
@PreAuthorize("hasRole('ADMINISTRATOR')")
@PostMapping(value = "/update")
public @ResponseBody String updateWiewsVocabulary() throws IOException {
LOG.info("Updating FAO WIEWS codes");
vocabularyService.autoUpdateOrCreateVocabulary(FAO_WIEWS_UUID, wiewsVocabularyUpdater.getWiewsVocabulary());
return "OK";
}
/**
* Gets the.
......@@ -88,7 +67,7 @@ public class WiewsController {
*/
@GetMapping(value = "/{code}", produces = MediaType.APPLICATION_JSON_VALUE)
public VocabularyTerm getWiewsTerm(@PathVariable("code") final String code) {
return vocabularyService.getVocabularyTerm(FAO_WIEWS_UUID, code);
return instituteService.getInstituteTerm(code);
}
/**
......@@ -104,6 +83,6 @@ public class WiewsController {
return Collections.emptyList();
}
return vocabularyService.autocompleteTerms(FAO_WIEWS_UUID, text);
return instituteService.autocompleteTerm(text);
}
}
/*
* Copyright 2017 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.worker;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* Fetch and parse country information from
* http://www.davros.org/misc/iso3166.txt
*
* @author Matija Obreza
* @author Maxym Borodenko
*/
@Component("davrosVocabularySource")
public class DavrosCountrySource {
public static final String DAVROS_ISO3166_URL = "http://www.davros.org/misc/iso3166.txt";
/** The Constant LOG. */
public static final Logger LOG = LoggerFactory.getLogger(org.genesys.catalog.service.worker.DavrosCountrySource.class);
/**
* Retrieve data from davros.org
*
* @return List with CountryInfo
* @throws IOException IOException
*/
public List<CountryInfo> fetchCountryData() throws IOException {
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
final HttpGet httpget = new HttpGet(DAVROS_ISO3166_URL);
HttpResponse response = null;
InputStream instream = null;
try {
response = httpclient.execute(httpget);
LOG.debug("HTTP Response status: {}", response.getStatusLine());
// Get hold of the response entity
final HttpEntity entity = response.getEntity();
LOG.debug(entity.getContentType() + " " + entity.getContentLength());
instream = entity.getContent();
final BufferedReader inreader = new BufferedReader(new InputStreamReader(instream));
final List<CountryInfo> countries = new ArrayList<>();
boolean active = true;
String line;
while ((line = inreader.readLine()) != null) {
if (LOG.isTraceEnabled()) {
LOG.trace(line);
}
if (line.startsWith("# Table 1: current codes")) {
active = true;
continue;
} else if (line.startsWith("# Table 2: codes withdrawn from use")) {
active = false;
continue;
} else if (line.length() == 0) {
continue;
} else if (line.startsWith("# ")) {
continue;
} else {
final CountryInfo countryInfo = parseLine(line, active);
if (!countries.contains(countryInfo)) {
countries.add(countryInfo);
}
}
}
inreader.close();
LOG.info("Returning {} countries from Davros", countries.size());
return countries.stream().sorted((a, b) -> a.getCode3().compareTo(b.getCode3())).collect(Collectors.toList());
} catch (final ClientProtocolException e) {
LOG.error(e.getMessage(), e);
throw new IOException("Could not execute HTTP request: " + e.getMessage(), e);
} catch (final RuntimeException ex) {
LOG.error(ex.getMessage(), ex);
httpget.abort();
throw new IOException(ex);
} finally {
LOG.info("Done fetching country info from davros.org");
if (instream != null) {
instream.close();
}
httpclient.close();
}
}
/**
* BQ ATB British Antarctic Territory GB GBR 826 United Kingdom of Great Britain
* and N. Ireland
*
* @param line line
* @param active is active
* @return parsed CountryInfo
*/
public static CountryInfo parseLine(final String line, final boolean active) {
final String a = line.substring(0, 10);
final String b = line.substring(11);
if (LOG.isTraceEnabled()) {
LOG.trace("Davros a=" + a + " b=" + b);
}
final String[] codes = a.split(" +", 3);
return new CountryInfo(codes[0], codes[1], codes[2], b, active);
}
/**
* The Class CountryInfo.
*/
public static class CountryInfo {
private final boolean active;
private final String code2;
private final String code3;
private final String codeNum;
private final String countryName;
/**
* Instantiates a new country info.
*
* @param code2 the code 2
* @param code3 the code 3
* @param codeNum the code num
* @param countryName the country name
* @param active the active
*/
public CountryInfo(final String code2, final String code3, final String codeNum, final String countryName, final boolean active) {
this.code2 = code2;
this.code3 = code3;
this.codeNum = codeNum;
this.countryName = countryName;
this.active = active;
}
/**
* Checks if is active.
*
* @return true, if is active
*/
public boolean isActive() {
return active;
}
/**
* Gets the code 2.
*
* @return the code 2
*/
public String getCode2() {
return code2;
}
/**
* Gets the code 3.
*
* @return the code 3
*/
public String getCode3() {
return code3;
}
/**
* Gets the code num.
*
* @return the code num
*/
public String getCodeNum() {
return codeNum;
}
/**
* Gets the country name.
*
* @return the country name
*/
public String getCountryName() {
return countryName;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("active=").append(active).append(" code3=").append(code3).append(" code2=").append(code2).append(" code#=").append(codeNum).append(" name=").append(
countryName);
return sb.toString();
}
}
}
/*
* Copyright 2017 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.worker;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.genesys.catalog.model.vocab.ControlledVocabulary;
import org.genesys.catalog.model.vocab.VocabularyTerm;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* The Class ISO3166VocabularyUpdater.
*
* @author Maxym Borodenko
* @author Matija Obreza
*/
@Component
public class ISO3166VocabularyUpdater {
/**
* ISO 3166-1 alpha-2 representation of names of countries and their
* subdivisions, contains two-letter country codes
* https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
*/
public static final UUID ISO3166_2ALPHA = UUID.fromString("3e39a73e-d1ed-40b0-9944-ac5795128686");
/**
* ISO 3166-1 alpha-2 representation of names of countries and their
* subdivisions, contains three-letter country codes
* https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3
*/
public static final UUID ISO3166_3ALPHA = UUID.fromString("39a3d6a2-20e6-4fab-8bfe-acb1f9fe774c");
/**
* ISO 3166-1 numeric representation of names of countries and their
* subdivisions, contains three-digit country codes
* https://en.wikipedia.org/wiki/ISO_3166-1_numeric
*/
public static final UUID ISO3166_NUMERIC = UUID.fromString("bd45f660-853f-4034-a434-ed50679579cc");
/** The Constant LOG. */
public static final Log LOG = LogFactory.getLog(ISO3166VocabularyUpdater.class);
@Autowired
private DavrosCountrySource davrosCountrySource;
/**
* Generates a current ISO-3166-3alpha {@link ControlledVocabulary} but doesn't
* persist it to storage.
*
* @return vocabulary of ISO-3166 3-letter country codes
* @throws IOException IOException
*/
public ControlledVocabulary getISO3166Alpha3Vocabulary() throws IOException {
return createVocabulary("ISO-3166-3alpha", country -> country.getCode3());
}
/**
* Generates a current ISO-3166-2alpha {@link ControlledVocabulary} but doesn't
* persist it to storage.
*
* @return vocabulary of ISO-3166 2-letter country codes
* @throws IOException IOException
*/
public ControlledVocabulary getISO3166Alpha2Vocabulary() throws IOException {
return createVocabulary("ISO-3166-2alpha", country -> country.getCode2());
}
/**
* Generates a current ISO-3166-numeric {@link ControlledVocabulary} but doesn't
* persist it to storage.
*
* @return vocabulary of ISO-3166 numeric country codes
* @throws IOException IOException
*/
public ControlledVocabulary getISO3166NumericVocabulary() throws IOException {
return createVocabulary("ISO-3166-numeric", country -> country.getCodeNum());
}
/**
* Creates the vocabulary.
*
* @param title the title
* @param toTerm the to term
* @return the controlled vocabulary
* @throws IOException Signals that an I/O exception has occurred.
*/
protected ControlledVocabulary createVocabulary(final String title, final Function<DavrosCountrySource.CountryInfo, String> toTerm) throws IOException {
final ControlledVocabulary vocabulary = new ControlledVocabulary();
vocabulary.setTitle(title);
vocabulary.setUrl(DavrosCountrySource.DAVROS_ISO3166_URL);
final DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy.MM.dd");
final LocalDate localDate = LocalDate.now();
vocabulary.setVersionTag(dtf.format(localDate));
final Map<String, VocabularyTerm> assignedCodes = new HashMap<>();
davrosCountrySource.fetchCountryData().stream().map(country -> {
final VocabularyTerm term = new VocabularyTerm();
term.setCode(toTerm.apply(country));
term.setTitle(country.getCountryName());
return term;
})
// remove terms without codes
.filter(term -> term.getCode() != null && term.getCode().length() > 0)
// add to vocabularyTerms if ISO country code is not assigned
.forEach(term -> {
if (!assignedCodes.containsKey(term.getCode())) {
assignedCodes.put(term.getCode(), term);
}
});
vocabulary.setTerms(new ArrayList<>(assignedCodes.values().stream().sorted((a, b) -> a.getCode().compareTo(b.getCode())).collect(Collectors.toList())));
return vocabulary;
}
}
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.worker;
import com.opencsv.CSVReader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* The Class WIEWSVocabularySource.
*
* @author Maxym Borodenko
*/
@Component("wiewsVocabularySource")
public class WiewsVocabularySource {
public static final String WIEWS_EXPORT_URL = "http://www.fao.org/wiews-archive/export_c.zip";
public static final String[] WIEWS_EXPORT_C_HEADERS = { "INSTCODE", "ACRONYM", "ECPACRONYM", "FULL_NAME", "TYPE", "GENEBANK_LONG_TERM_COLLECTIONS", "BOTANICAL_GARDEN",
"GENEBANK_MEDIUM_TERM_COLLECTIONS", "GENEBANK_SHORT_TERM_COLLECTIONS", "STREET_POB", "CITY_STATE", "ZIP_CODE", "PHONE", "FAX", "EMAIL", "URL", "LATITUDE", "LONGITUDE",
"ALTITUDE", "UPDATED_ON", "V_INSTCODE", "ISO3" };
public final static int COL_INSTCODE = 0;
public final static int COL_ACRONYM = 1;
public final static int COL_ECPACRONYM = 2;
public final static int COL_FULL_NAME = 3;
public final static int COL_TYPE = 4;
public final static int COL_GENEBANK_LONG_TERM_COLLECTIONS = 5;
public final static int COL_BOTANICAL_GARDEN = 6;
public final static int COL_GENEBANK_MEDIUM_TERM_COLLECTIONS = 7;
public final static int COL_GENEBANK_SHORT_TERM_COLLECTIONS = 8;
public final static int COL_STREET_POB = 9;
public final static int COL_CITY_STATE = 10;
public final static int COL_ZIP_CODE = 11;
public final static int COL_PHONE = 12;
public final static int COL_FAX = 13;
public final static int COL_EMAIL = 14;
public final static int COL_URL = 15;
public final static int COL_LATITUDE = 16;
public final static int COL_LONGITUDE = 17;
public final static int COL_ALTITUDE = 18;
public final static int COL_UPDATED_ON = 19;
public final static int COL_V_INSTCODE = 20;
public final static int COL_ISO3 = 21;
public static final Logger LOG = LoggerFactory.getLogger(WiewsVocabularySource.class);
public List<WiewsInfo> fetchWiewsData() throws IOException {
final HttpGet httpget = new HttpGet(WIEWS_EXPORT_URL);
HttpResponse response = null;
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
try {
response = httpclient.execute(httpget);
LOG.debug(response.getStatusLine().toString());
final HttpEntity entity = response.getEntity();
if (entity == null) {
LOG.warn("No HttpEntity in response, bailing out");
return Collections.emptyList();
}
LOG.debug("{} {}", entity.getContentType(), entity.getContentLength());
if (entity != null) {
ZipInputStream instream = null;
CSVReader reader = null;
try {
instream = new ZipInputStream(entity.getContent());
final ZipEntry zipEntry = instream.getNextEntry();
LOG.debug("Got entry: {}", zipEntry.getName());
if (!zipEntry.getName().equals("export_c.txt")) {
LOG.warn("Expected export_c, not {}", zipEntry.getName());
throw new IOException("Missing export_c");
}
final InputStreamReader inreader = new InputStreamReader(instream, "UTF-8");
reader = new CSVReader(inreader, ',', '"', false);
// Ensure headers match known format
final String[] headers = reader.readNext();
LOG.warn("export_c.txt headers: {}", ArrayUtils.toString(headers, "<null>"));
if (WIEWS_EXPORT_C_HEADERS.length != headers.length || !Arrays.equals(WIEWS_EXPORT_C_HEADERS, headers)) {
throw new WIEWSUpdateException("export_c.txt headers mismatch: " + ArrayUtils.toString(headers, "<null>"));
}
final List<WiewsInfo> dataToImport = new ArrayList<>();
String[] line = null;
long k = 0;
while ((line = reader.readNext()) != null) {
for (int i = 0; i < line.length; i++) {
if (line[i].equals("null") || StringUtils.isBlank(line[i])) {
line[i] = null;
}
}
dataToImport.add(getWiewsInstance(line));
k++;
if (k % 1000 == 1) {
LOG.info("Read {} lines", k);
}
}
reader.close();
return dataToImport.stream().sorted(Comparator.comparing(a -> a.instcode)).collect(Collectors.toList());
} catch (final RuntimeException ex) {
LOG.error(ex.getMessage(), ex);
httpget.abort();
} catch (final WIEWSUpdateException e) {
throw new IOException(e);
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(instream);
}
}
LOG.info("Done reading WIEWS data");