Commit 6c0a31a7 authored by Matija Obreza's avatar Matija Obreza
Browse files

Updated SGSVUpdater

parent 7e5bd645
......@@ -72,6 +72,53 @@ class SGSVEntry implements AccessionIdentifier3 {
@Override
public String toString() {
return "SGSVEntry " + instCode + " " + acceNumb;
return "SGSVEntry " + instCode + " " + acceNumb + " " + genus;
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((instCode == null) ? 0 : instCode.hashCode());
result = prime * result + ((acceNumb == null) ? 0 : acceNumb.hashCode());
result = prime * result + ((genus == null) ? 0 : genus.hashCode());
return result;
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
SGSVEntry other = (SGSVEntry) obj;
if (instCode == null) {
if (other.instCode != null)
return false;
} else if (!instCode.equals(other.instCode))
return false;
if (acceNumb == null) {
if (other.acceNumb != null)
return false;
} else if (!acceNumb.equals(other.acceNumb))
return false;
if (genus == null) {
if (other.genus != null)
return false;
} else if (!genus.equals(other.genus))
return false;
return true;
}
}
......@@ -23,6 +23,9 @@ import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import com.opencsv.CSVReader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
......@@ -42,28 +45,23 @@ import org.genesys2.server.service.GenesysService;
import org.hibernate.exception.ConstraintViolationException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.task.TaskExecutor;
import org.springframework.orm.ObjectOptimisticLockingFailureException;
import org.springframework.orm.hibernate3.HibernateOptimisticLockingFailureException;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Component;
import com.opencsv.CSVReader;
@Component
public class SGSVUpdate {
private static final String SGSV_DOWNLOAD_URL = "http://www.nordgen.org/sgsv/download.php?file=/scope/sgsv/files/sgsv_templates.tab";
static final String[] SGSV_HEADERS = { "sgsv_id", "institute_code", "deposit_box_number", "collection_name", "accession_number", "full_scientific_name",
"country_of_collection_or_source", "number_of_seeds", "regeneration_month_and_year", "other_accession_designations", "provider_institute_code",
"accession_url", "country_code", "country_name", "continent_name", "seeds", "genus", "species_epithet", "species", "taxon_name", "date_of_deposit",
"date_of_dataset", "sgsv_template_id", "box_id", "sgsv_taxon_id", "taxon_authority", "infraspesific_epithet", "vernacular_name", "itis_tsn",
"sgsv_genus_id", "accession_name" };
"country_of_collection_or_source", "number_of_seeds", "regeneration_month_and_year", "other_accession_designations", "provider_institute_code", "accession_url",
"country_code", "country_name", "continent_name", "seeds", "genus", "species_epithet", "species", "taxon_name", "date_of_deposit", "date_of_dataset",
"sgsv_template_id", "box_id", "sgsv_taxon_id", "taxon_authority", "infraspesific_epithet", "vernacular_name", "itis_tsn", "sgsv_genus_id", "accession_name" };
public static final Log LOG = LogFactory.getLog(SGSVUpdate.class);
private static final int BATCH_SIZE = 50;
@Autowired
private HttpClientBuilder httpClientBuilder;
@Autowired
private TaskExecutor taskExecutor;
......@@ -79,7 +77,7 @@ public class SGSVUpdate {
final HttpGet httpget = new HttpGet(SGSV_DOWNLOAD_URL);
HttpResponse response = null;
final CloseableHttpClient httpclient = httpClientBuilder.build();
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
try {
response = httpclient.execute(httpget);
......@@ -200,8 +198,12 @@ public class SGSVUpdate {
// EXIT
return;
} catch (final HibernateOptimisticLockingFailureException e) {
} catch (final ObjectOptimisticLockingFailureException e) {
LOG.warn("Failed to save data, will retry. " + e.getMessage());
try {
Thread.sleep((long) (50 * Math.random()));
} catch (InterruptedException e1) {
}
continue;
}
}
......@@ -213,17 +215,15 @@ public class SGSVUpdate {
bulk.clear();
}
List<SGSVEntry> bulkToList(ArrayList<String[]> bulkCopy) {
final List<SGSVEntry> entries = new ArrayList<SGSVEntry>(bulkCopy.size());
// Extract INSTCODE and ACCENUMB
for (final String[] entry : bulkCopy) {
List<SGSVEntry> bulkToList(List<String[]> bulkCopy) {
return bulkCopy.stream().map(row -> {
try {
entries.add(new SGSVEntry(entry));
return new SGSVEntry(row);
} catch (ArrayIndexOutOfBoundsException | NumberFormatException e) {
LOG.warn("Invalid entry: " + ArrayUtils.toString(entry, "NULL"));
LOG.warn("Invalid entry: " + ArrayUtils.toString(row, "NULL"));
return null;
}
}
return entries;
}).filter(entry -> entry!=null && entry.acceNumb != null).distinct().collect(Collectors.toList());
}
void updateSvalbards(List<SGSVEntry> accns) {
......@@ -299,7 +299,7 @@ public class SGSVUpdate {
try {
LOG.info("Saving svalbards size=" + svalbards.size());
genesysService.saveSvalbards(svalbards);
} catch (final ConstraintViolationException e) {
} catch (final ConstraintViolationException | ObjectOptimisticLockingFailureException e) {
LOG.warn(e.getMessage());
for (final Accession a : matching) {
LOG.warn("\t" + a);
......
/*
* Copyright 2016 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys2.server.service.worker;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.junit.Ignore;
import org.junit.Test;
/**
* SGSV data:
*
* <ul>
* <li>INSTCODE, ACCENUMB are not unique in SGSV. See USA996 accession PI89009 or COL003:G10276</li>
* </ul>
*
* @author mobreza
*
*/
@Ignore
public class SGSVDumpTest {
@Test
public void download() {
// Set<String> instCodeAcceNumb = new HashSet<>();
SGSVUpdate updater = new SGSVUpdate() {
@Override
void workIt(List<String[]> bulk) {
LOG.trace("Queueing job size=" + bulk.size());
final List<SGSVEntry> accns = bulkToList(bulk);
for (SGSVEntry entry : accns) {
if (StringUtils.isBlank(entry.acceNumb)) {
LOG.warn("No ACCENUMB for entry=" + entry);
}
// String key = entry.instCode + ":" + entry.acceNumb + ":" + entry.genus;
// if (instCodeAcceNumb.contains(key)) {
// LOG.warn("Duplicate entry for " + key + " entry=" + entry);
// }
// instCodeAcceNumb.add(key);
}
}
};
updater.updateSGSV();
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment