Commit 6ab63143 authored by Matija Obreza's avatar Matija Obreza
Browse files

Merge branch '129-fao-wiews-code-updater' into 'master'

Resolve "FAO WIEWS code updater"

Closes #129

See merge request !126
parents d58f42a2 94b1b1cd
......@@ -200,7 +200,7 @@ public class VocabularyServiceImpl implements VocabularyService {
updateTerms(input);
} else if (input != null && input.getTerms() != null) {
LOG.info("Matching against {} existing terms: {}", existing.size(), existing);
LOG.info("Matching against {} existing terms", existing.size());
// match existing codes
input.getTerms().forEach(inputTerm -> {
......
......@@ -46,19 +46,19 @@ public class ISO3166VocabularyUpdater {
* ISO 3166-1 alpha-2 representation of names of countries and their subdivisions, contains two-letter country codes
* https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
*/
public static UUID ISO3166_2ALPHA = UUID.fromString("3e39a73e-d1ed-40b0-9944-ac5795128686");
public static final UUID ISO3166_2ALPHA = UUID.fromString("3e39a73e-d1ed-40b0-9944-ac5795128686");
/**
* ISO 3166-1 alpha-2 representation of names of countries and their subdivisions, contains three-letter country codes
* https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3
*/
public static UUID ISO3166_3ALPHA = UUID.fromString("39a3d6a2-20e6-4fab-8bfe-acb1f9fe774c");
public static final UUID ISO3166_3ALPHA = UUID.fromString("39a3d6a2-20e6-4fab-8bfe-acb1f9fe774c");
/**
* ISO 3166-1 numeric representation of names of countries and their subdivisions, contains three-digit country codes
* https://en.wikipedia.org/wiki/ISO_3166-1_numeric
*/
public static UUID ISO3166_NUMERIC = UUID.fromString("bd45f660-853f-4034-a434-ed50679579cc");
public static final UUID ISO3166_NUMERIC = UUID.fromString("bd45f660-853f-4034-a434-ed50679579cc");
/** The Constant LOG. */
public static final Log LOG = LogFactory.getLog(ISO3166VocabularyUpdater.class);
......
......@@ -45,7 +45,7 @@ public class ISO639VocabularyUpdater {
* ISO 639 representation of names for languages and language groups.
* https://en.wikipedia.org/wiki/ISO_639
*/
public static UUID ISO639_3 = UUID.fromString("21b10067-ba15-44dd-867f-6a18a117fee8");
public static final UUID ISO639_3 = UUID.fromString("21b10067-ba15-44dd-867f-6a18a117fee8");
/** The Constant LOG. */
public static final Log LOG = LogFactory.getLog(ISO639VocabularyUpdater.class);
......
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.worker;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import au.com.bytecode.opencsv.CSVReader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
/**
* The Class WIEWSVocabularySource.
*
* @author Maxym Borodenko
*/
@Component
public class WiewsVocabularySource {
public static final String WIEWS_EXPORT_URL = "http://www.fao.org/wiews-archive/export_c.zip";
public static final String[] WIEWS_EXPORT_C_HEADERS = { "INSTCODE", "ACRONYM", "ECPACRONYM", "FULL_NAME", "TYPE", "GENEBANK_LONG_TERM_COLLECTIONS",
"BOTANICAL_GARDEN", "GENEBANK_MEDIUM_TERM_COLLECTIONS", "GENEBANK_SHORT_TERM_COLLECTIONS", "STREET_POB", "CITY_STATE", "ZIP_CODE", "PHONE", "FAX",
"EMAIL", "URL", "LATITUDE", "LONGITUDE", "ALTITUDE", "UPDATED_ON", "V_INSTCODE", "ISO3" };
public final static int COL_INSTCODE = 0;
public final static int COL_ACRONYM = 1;
public final static int COL_ECPACRONYM = 2;
public final static int COL_FULL_NAME = 3;
public final static int COL_TYPE = 4;
public final static int COL_GENEBANK_LONG_TERM_COLLECTIONS = 5;
public final static int COL_BOTANICAL_GARDEN = 6;
public final static int COL_GENEBANK_MEDIUM_TERM_COLLECTIONS = 7;
public final static int COL_GENEBANK_SHORT_TERM_COLLECTIONS = 8;
public final static int COL_STREET_POB = 9;
public final static int COL_CITY_STATE = 10;
public final static int COL_ZIP_CODE = 11;
public final static int COL_PHONE = 12;
public final static int COL_FAX = 13;
public final static int COL_EMAIL = 14;
public final static int COL_URL = 15;
public final static int COL_LATITUDE = 16;
public final static int COL_LONGITUDE = 17;
public final static int COL_ALTITUDE = 18;
public final static int COL_UPDATED_ON = 19;
public final static int COL_V_INSTCODE = 20;
public final static int COL_ISO3 = 21;
public static final Logger LOG = LoggerFactory.getLogger(WiewsVocabularySource.class);
public List<WiewsInfo> fetchWiewsData() throws IOException {
final HttpGet httpget = new HttpGet(WIEWS_EXPORT_URL);
HttpResponse response = null;
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
try {
response = httpclient.execute(httpget);
LOG.debug(response.getStatusLine().toString());
final HttpEntity entity = response.getEntity();
if (entity == null) {
LOG.warn("No HttpEntity in response, bailing out");
return Collections.emptyList();
}
LOG.debug("{} {}", entity.getContentType(), entity.getContentLength());
if (entity != null) {
ZipInputStream instream = null;
CSVReader reader = null;
try {
instream = new ZipInputStream(entity.getContent());
final ZipEntry zipEntry = instream.getNextEntry();
LOG.debug("Got entry: {}", zipEntry.getName());
if (!zipEntry.getName().equals("export_c.txt")) {
LOG.warn("Expected export_c, not {}", zipEntry.getName());
throw new IOException("Missing export_c");
}
final InputStreamReader inreader = new InputStreamReader(instream, "UTF-8");
reader = new CSVReader(inreader, ',', '"', false);
// Ensure headers match known format
final String[] headers = reader.readNext();
LOG.warn("export_c.txt headers: {}", ArrayUtils.toString(headers, "<null>"));
if (WIEWS_EXPORT_C_HEADERS.length != headers.length || !Arrays.equals(WIEWS_EXPORT_C_HEADERS, headers)) {
throw new WIEWSUpdateException("export_c.txt headers mismatch: " + ArrayUtils.toString(headers, "<null>"));
}
final List<WiewsInfo> dataToImport = new ArrayList<>();
String[] line = null;
long k = 0;
while ((line = reader.readNext()) != null) {
for (int i = 0; i < line.length; i++) {
if (line[i].equals("null") || StringUtils.isBlank(line[i])) {
line[i] = null;
}
}
dataToImport.add(getWiewsInstance(line));
k++;
if (k % 1000 == 1) {
LOG.info("Read {} lines", k);
}
}
reader.close();
return dataToImport.stream().sorted(Comparator.comparing(a -> a.instcode)).collect(Collectors.toList());
} catch (final RuntimeException ex) {
LOG.error(ex.getMessage(), ex);
httpget.abort();
} catch (WIEWSUpdateException e) {
throw new IOException(e);
} finally {
IOUtils.closeQuietly(reader);
IOUtils.closeQuietly(instream);
}
}
LOG.info("Done reading WIEWS data");
} finally {
IOUtils.closeQuietly(httpclient);
}
return Collections.emptyList();
}
public static WiewsInfo getWiewsInstance(final String[] values) {
return new WiewsInfo(values[COL_INSTCODE], values[COL_ACRONYM], values[COL_FULL_NAME], values[COL_EMAIL], values[COL_URL],
values[COL_STREET_POB], values[COL_ZIP_CODE], values[COL_CITY_STATE], values[COL_ISO3], values[COL_ECPACRONYM]);
}
/**
* The Class WiewsInfo.
*/
public static class WiewsInfo {
private final String instcode;
private final String acronym;
private final String fullName;
private final String email;
private final String url;
private final String streetPob;
private final String zipCode;
private final String cityState;
private final String iso3;
private final String ecpaacronym;
/**
* Instantiates a new WIEWS info.
*
* @param instcode the instcode
* @param acronym the acronym
* @param fullName the fullName
* @param email the email
* @param url the url
* @param streetPob the streetPob
* @param zipCode the zipCode
* @param cityState the cityState
* @param iso3 the iso3
* @param ecpaacronym the ecpaacronym
*/
public WiewsInfo(final String instcode, final String acronym, final String fullName, final String email, final String url,
final String streetPob, final String zipCode, final String cityState, final String iso3, final String ecpaacronym) {
this.instcode = instcode;
this.acronym = acronym;
this.fullName = fullName;
this.email = email;
this.url = url;
this.streetPob = streetPob;
this.zipCode = zipCode;
this.cityState = cityState;
this.iso3 = iso3;
this.ecpaacronym = ecpaacronym;
}
/**
* Gets the instcode.
*
* @return the instcode
*/
public String getInstcode() {
return instcode;
}
/**
* Gets the acronym.
*
* @return the acronym
*/
public String getAcronym() {
return acronym;
}
/**
* Gets the full name.
*
* @return the fullName
*/
public String getFullName() {
return fullName;
}
/**
* Gets the email.
*
* @return the email
*/
public String getEmail() {
return email;
}
/**
* Gets the url.
*
* @return the url
*/
public String getUrl() {
return url;
}
/**
* Gets the streetPob.
*
* @return the streetPob
*/
public String getStreetPob() {
return streetPob;
}
/**
* Gets the zip code.
*
* @return the zipCode
*/
public String getZipCode() {
return zipCode;
}
/**
* Gets the city state.
*
* @return the cityState
*/
public String getCityState() {
return cityState;
}
/**
* Gets the iso3.
*
* @return the iso3
*/
public String getIso3() {
return iso3;
}
/**
* Gets the ecpaacronym.
*
* @return the ecpaacronym
*/
public String getEcpaacronym() {
return ecpaacronym;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("instcode=").append(instcode).append(" acronym=").append(acronym).append(" fullName=").append(fullName).append(" email=").append(email).append(" url=").append(url)
.append(" streetPob=").append(streetPob).append(" zipCode=").append(zipCode).append(" cityState=").append(cityState).append(" iso3=").append(iso3).append(" ecpaacronym=").append(ecpaacronym);
return sb.toString();
}
}
public static class WIEWSUpdateException extends Exception {
private static final long serialVersionUID = 5390173913753279157L;
public WIEWSUpdateException(String message) {
super(message);
}
}
}
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.worker;
import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.genesys.catalog.model.vocab.ControlledVocabulary;
import org.genesys.catalog.model.vocab.VocabularyTerm;
import org.springframework.beans.factory.annotation.Autowired;
import org.genesys.catalog.service.worker.WiewsVocabularySource.WiewsInfo;
import org.springframework.stereotype.Component;
/**
* @author Maxym Borodenko
*/
@Component
public class WiewsVocabularyUpdater {
public static UUID FAO_WIEWS_UUID = UUID.fromString("36b4a674-e2eb-4ba1-a05a-71cfc2af862e");
/** The Constant LOG. */
public static final Log LOG = LogFactory.getLog(WiewsVocabularyUpdater.class);
@Autowired
private WiewsVocabularySource institutionSource;
/**
* Generates a current FAO WIEWS {@link ControlledVocabulary} but doesn't persist
* it to storage.
*
* @return vocabulary of FAO WIEWS codes
* @throws IOException IOException
*/
public ControlledVocabulary getWiewsVocabulary() throws IOException {
return createVocabulary("FAO WIEWS", WiewsInfo::getInstcode);
}
/**
* Creates the vocabulary.
*
* @param title the title
* @param toTerm the to term
* @return the controlled vocabulary
* @throws IOException Signals that an I/O exception has occurred.
*/
protected ControlledVocabulary createVocabulary(final String title, final Function<WiewsInfo, String> toTerm) throws IOException {
final ControlledVocabulary vocabulary = new ControlledVocabulary();
vocabulary.setTitle(title);
vocabulary.setUrl(WiewsVocabularySource.WIEWS_EXPORT_URL);
final DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy.MM.dd");
final LocalDate localDate = LocalDate.now();
vocabulary.setVersionTag(dtf.format(localDate));
final Map<String, VocabularyTerm> assignedCodes = new HashMap<>();
institutionSource.fetchWiewsData().stream().map(wiewsInfo -> {
final VocabularyTerm term = new VocabularyTerm();
term.setCode(toTerm.apply(wiewsInfo));
term.setTitle(getFirstExistingField(wiewsInfo));
term.setDescription(generateMarkdownForDescription(wiewsInfo));
return term;
})
// remove terms without codes
.filter(term -> (term.getCode() != null) && (term.getCode().length() > 0))
// add to vocabularyTerms if ISO language code is not assigned
.forEach(term -> {
if (!assignedCodes.containsKey(term.getCode())) {
assignedCodes.put(term.getCode(), term);
}
});
vocabulary.setTerms(new ArrayList<>(assignedCodes.values().stream().sorted(Comparator.comparing(VocabularyTerm::getCode)).collect(Collectors.toList())));
return vocabulary;
}
/**
* Pick first one that exists in this order: FULL_NAME, ACRONYM, ECPAACRONYM, INSTCODE
*
* @param wiewsInfo WIEWS data
* @return the title for controlled vocabulary term
*/
private String getFirstExistingField(final WiewsInfo wiewsInfo) {
if (wiewsInfo.getFullName() != null) return wiewsInfo.getFullName();
else if (wiewsInfo.getAcronym() != null) return wiewsInfo.getAcronym();
else if (wiewsInfo.getEcpaacronym() != null) return wiewsInfo.getEcpaacronym();
return wiewsInfo.getInstcode();
}
/**
* Generate markdown for description of controlled vocabulary term
*
* @param wiewsInfo WIEWS data
* @return the description for controlled vocabulary term
*/
private String generateMarkdownForDescription(final WiewsInfo wiewsInfo) {
final String breakLine = " \n";
final String line1 = ((wiewsInfo.getAcronym() != null ? wiewsInfo.getAcronym() : "") + (wiewsInfo.getFullName() != null ? " **" + wiewsInfo.getFullName() + "**" : "")).trim();
final String line2 = wiewsInfo.getEmail() != null ? wiewsInfo.getEmail() : "";
final String line3 = wiewsInfo.getUrl() != null ? wiewsInfo.getUrl() : "";
final String line4 = wiewsInfo.getStreetPob() != null ? wiewsInfo.getStreetPob() : "";
final String line5 = ((wiewsInfo.getZipCode() != null ? wiewsInfo.getZipCode() : "") + (wiewsInfo.getCityState() != null ? " " + wiewsInfo.getCityState() : "")).trim();
final String line6 = wiewsInfo.getIso3() != null ? wiewsInfo.getIso3() : "";
final StringBuilder builder = new StringBuilder();
if (!line1.isEmpty()) builder.append(line1).append(breakLine).append(breakLine);
if (!line2.isEmpty()) builder.append(line2).append(breakLine);
if (!line3.isEmpty()) builder.append(line3).append(breakLine);
if (!line2.isEmpty() || !line3.isEmpty()) builder.append(breakLine);
if (!line4.isEmpty()) builder.append(line4).append(breakLine);
if (!line5.isEmpty()) builder.append(line5).append(breakLine);
if (!line6.isEmpty()) builder.append(line6);
return builder.toString();
}
}
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.server.controller.api.v0;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import org.genesys.catalog.model.vocab.VocabularyTerm;
import org.genesys.catalog.service.VocabularyService;
import org.genesys.catalog.service.worker.WiewsVocabularyUpdater;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
/**
* The Class WiewsController.
*
* @author Maxym Borodenko
*/
@RestController
@RequestMapping(WiewsController.API_BASE)
@PreAuthorize("isAuthenticated()")
public class WiewsController {
public static final String API_BASE = "/api/v0/wiews";
public static final UUID FAO_WIEWS_UUID = WiewsVocabularyUpdater.FAO_WIEWS_UUID;
private static final Logger LOG = LoggerFactory.getLogger(WiewsController.class);
@Autowired
private VocabularyService vocabularyService;
@Autowired
private WiewsVocabularyUpdater wiewsVocabularyUpdater;
@PreAuthorize("hasRole('ADMINISTRATOR')")