Commit c757d1c7 authored by Maxym Borodenko's avatar Maxym Borodenko Committed by Matija Obreza
Browse files

Geonames for administrative units

parent 4fb03a63
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service;
import org.genesys.common.model.Geoname;
import java.util.List;
/**
* @author Maxym Borodenko
*/
public interface GeonamesService {
/**
* Update geonames
*
* @param list list of geonames to be updated in the db
* @throws Exception when list of geonames not saved
*/
void update(List<Geoname> list) throws Exception;
}
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.impl;
import org.genesys.catalog.service.GeonamesService;
import org.genesys.common.model.Geoname;
import org.genesys.common.persistence.GeonameRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
/**
* The Class GeonamesServiceImpl.
*
* @author Maxym Borodenko
*/
@Service
@Transactional(readOnly = true)
public class GeonamesServiceImpl implements GeonamesService {
private static final Logger LOG = LoggerFactory.getLogger(GeonamesServiceImpl.class);
@Autowired
private GeonameRepository geonameRepository;
@Override
@Transactional
public void update(final List<Geoname> list) throws Exception {
geonameRepository.bulkSave(list);
}
}
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.catalog.service.worker;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.genesys.catalog.service.GeonamesService;
import org.genesys.common.model.Geoname;
import org.genesys.common.persistence.GeonameRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Component;
/**
* @author Maxym Borodenko
*/
@Component
public class GeonamesUpdater {
private static final int BATCH_SIZE = 500;
private static final Logger LOG = LoggerFactory.getLogger(GeonamesUpdater.class);
public static final String DUMP_FILE_NAME = "allCountries.txt";
// public static final String GEONAMES_DUMP_URL = "http://download.geonames.org/export/dump/UA.zip";
public static final String GEONAMES_DUMP_URL = "http://download.geonames.org/export/dump/allCountries.zip";
private static final String TEMP_DIR = "temp";
private static boolean RUNNING = false;
public final static int GEONAME_ID = 0;
public final static int NAME = 1;
public final static int ASCII_NAME = 2;
public final static int ALTERNATE_NAMES = 3;
public final static int LATITUDE = 4;
public final static int LONGITUDE = 5;
public final static int FEATURE_CLASS = 6;
public final static int FEATURE_CODE = 7;
public final static int COUNTRY_CODE = 8;
public final static int CC2 = 9;
public final static int ADMIN1_CODE = 10;
public final static int ADMIN2_CODE = 11;
public final static int ADMIN3_CODE = 12;
public final static int ADMIN4_CODE = 13;
public final static int POPULATION = 14;
public final static int ELEVATION = 15;
public final static int DEM = 16;
public final static int TIMEZONE = 17;
public final static int MODIFICATION_DATE = 18;
@Autowired
private GeonameRepository geonameRepository;
@Autowired
private GeonamesService geonamesService;
private ExecutorService executor = Executors.newFixedThreadPool(1);
/**
* Update local Geonames with data from geonames.org
*
*/
@PreAuthorize("hasRole('ADMINISTRATOR')")
public void updateGeonames() {
executor.submit(() -> {
try {
if (!isRunning()) {
downloadUnpackAndImportGeonames();
}
} catch (final IOException e) {
LOG.error(e.getMessage(), e);
}
});
}
private void downloadUnpackAndImportGeonames() throws IOException {
LOG.warn("Downloading geonames data from {}", GEONAMES_DUMP_URL);
RUNNING = true;
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
final HttpGet httpget = new HttpGet(GEONAMES_DUMP_URL);
final HttpResponse response;
final HttpEntity entity;
InputStream instream = null;
File dumbFile = null;
try {
response = httpclient.execute(httpget);
// Get hold of the response entity
entity = response.getEntity();
final byte[] buffer = new byte[1024];
instream = entity.getContent();
ZipInputStream zis = new ZipInputStream(instream);
ZipEntry ze = zis.getNextEntry();
while (ze != null) {
final String fileName = ze.getName();
final File newFile = new File(TEMP_DIR + File.separator + fileName);
LOG.warn("Unpacking {} file to {}", fileName, newFile.getAbsolutePath());
if (fileName.equals(DUMP_FILE_NAME)) {
dumbFile = newFile;
}
//update directories for sub directories in zip
new File(newFile.getParent()).mkdirs();
final FileOutputStream fos = new FileOutputStream(newFile);
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
LOG.warn("File unpack completed to {}", newFile.getAbsolutePath());
fos.close();
zis.closeEntry();
ze = zis.getNextEntry();
}
if (dumbFile != null) {
importGeonames(dumbFile);
} else {
LOG.warn("Expected file {} was not found.", DUMP_FILE_NAME);
throw new IOException("Missing file " + DUMP_FILE_NAME);
}
} catch (final Throwable e) {
LOG.error("Geonames download and unpack failed to complete.", e);
throw new IOException(e);
} finally {
RUNNING = false;
IOUtils.closeQuietly(httpclient);
IOUtils.closeQuietly(instream);
FileUtils.deleteQuietly(new File(TEMP_DIR));
}
}
private void importGeonames(final File unpackedFile) throws IOException {
LOG.warn("Importing geonames data from {} file.", unpackedFile.getName());
FileInputStream inputStream = null;
final Scanner sc;
try {
inputStream = new FileInputStream(unpackedFile.getAbsolutePath());
sc = new Scanner(inputStream, "UTF-8");
List<Geoname> listToSave = new ArrayList<>(BATCH_SIZE);
long k = 0;
while (sc.hasNextLine()) {
final String line = sc.nextLine();
final String[] values = line.split("\t");
if (listToSave.size() == BATCH_SIZE) {
processData(listToSave);
} else {
Geoname geoname = geonameRepository.findOne(Long.valueOf(values[GEONAME_ID].trim()));
if (geoname == null) {
geoname = new Geoname();
extractParsedLineIntoGeoname(geoname, values);
} else {
extractParsedLineIntoGeoname(geoname, values);
}
listToSave.add(geoname);
}
k++;
}
processData(listToSave);
LOG.info("Done importing geonames database");
IOUtils.closeQuietly(sc);
} finally {
IOUtils.closeQuietly(inputStream);
}
}
/**
* Extract data from line into Geoname instance
*/
private void extractParsedLineIntoGeoname(final Geoname geoname, final String[] values) {
geoname.setId(Long.valueOf(values[GEONAME_ID].trim()));
geoname.setName(values[NAME]);
geoname.setAsciiname(values[ASCII_NAME]);
//TODO error when saving string with ASCII characters
// SQLException: Incorrect string value: '\xF0\x90\x8C\xB0\xF0\x90...' for column 'alternatenames'
geoname.setAlternatenames(values[ALTERNATE_NAMES].replaceAll("[^\\p{ASCII}]", ""));
geoname.setLatitude(!values[LATITUDE].isEmpty() ? Double.valueOf(values[LATITUDE]) : null);
geoname.setLongitude(!values[LONGITUDE].isEmpty() ? Double.valueOf(values[LONGITUDE]) : null);
geoname.setFeatureClass(values[FEATURE_CLASS]);
geoname.setFeatureCode(values[FEATURE_CODE]);
geoname.setCountryCode(values[COUNTRY_CODE]);
geoname.setCc2(values[CC2]);
geoname.setAdmin1Code(values[ADMIN1_CODE]);
geoname.setAdmin2Code(values[ADMIN2_CODE]);
geoname.setAdmin3Code(values[ADMIN3_CODE]);
geoname.setAdmin4Code(values[ADMIN4_CODE]);
geoname.setPopulation(!values[POPULATION].isEmpty() ? Long.valueOf(values[POPULATION]) : null);
geoname.setElevation(!values[ELEVATION].isEmpty() ? Integer.valueOf(values[ELEVATION]) : null);
geoname.setDem(!values[DEM].isEmpty() ? Integer.valueOf(values[DEM]) : null);
geoname.setTimezone(values[TIMEZONE]);
try {
geoname.setModificationDate(!values[MODIFICATION_DATE].isEmpty() ? new SimpleDateFormat("yy-mm-dd").parse(values[MODIFICATION_DATE]) : null);
} catch (Exception ex) {
geoname.setModificationDate(null);
}
}
private void processData(final List<Geoname> bulk) {
final List<Geoname> copy = new ArrayList<>(bulk);
bulk.clear();
try {
geonamesService.update(copy);
} catch (final Exception e) {
LOG.error("Some data bulk was not saved, read logs!", e);
}
}
public static synchronized boolean isRunning() {
return RUNNING;
}
}
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.common.model;
import org.genesys.blocks.model.AuditedVersionedModelWithoutId;
import javax.persistence.Cacheable;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Lob;
import javax.persistence.Table;
import java.util.Date;
/**
* @author Maxym Borodenko
*/
@Entity
@Cacheable
@Table(name = "geoname")
public class Geoname extends AuditedVersionedModelWithoutId {
/**
* the Id value that geonames.org provides
*/
@Id
@Column(name = "geoname_id", unique = true, nullable = false)
private Long id;
/**
* name of geographical point
*/
@Column(length = 200, nullable = false)
private String name;
/**
* name of geographical point in plain ascii characters
*/
@Column(length = 200)
private String asciiname;
/**
* alternate names
*/
@Lob
@Column
private String alternatenames;
/**
* latitude in decimal degrees
*/
@Column
private Double latitude;
/**
* longitude in decimal degrees
*/
@Column
private Double longitude;
/**
* feature class
*/
@Column(name = "feature_class", length = 1)
private String featureClass;
/**
* feature code
*/
@Column(name = "feature_code", length = 10)
private String featureCode;
/**
* ISO-3166 2-letter country code
*/
@Column(name = "country_code", length = 2)
private String countryCode;
/**
* alternate country codes, comma separated, ISO-3166 2-letter country code
*/
@Column(name = "cc2", length = 200)
private String cc2;
/**
* fipscode (subject to change to iso code)
*/
@Column(name = "admin1_code", length = 20)
private String admin1Code;
/**
* code for the second administrative division
*/
@Column(name = "admin2_code", length = 80)
private String admin2Code;
/**
* code for third level administrative division
*/
@Column(name = "admin3_code", length = 20)
private String admin3Code;
/**
* code for fourth level administrative division
*/
@Column(name = "admin4_code", length = 20)
private String admin4Code;
/**
* the population
*/
@Column
private Long population;
/**
* the elevation in meters
*/
@Column
private Integer elevation;
/**
* digital elevation model
*/
@Column
private Integer dem;
/**
* the timezone
*/
@Column(length = 40)
private String timezone;
/**
* the modification date
*/
@Column(name = "modification_date")
private Date modificationDate;
/**
* Gets the id.
*
* @return the id
*/
public Long getId() {
return id;
}
/**
* Sets the id.
*
* @param id the id to set
*/
public void setId(final Long id) {
this.id = id;
}
/**
* Gets the name.
*
* @return the name
*/
public String getName() {
return name;
}
/**
* Sets the descriptors.
*
* @param name the name to set
*/
public void setName(final String name) {
this.name = name;
}
/**
* Gets the ascii name.
*
* @return the ascii name
*/
public String getAsciiname() {
return asciiname;
}
/**
* Sets the ascii name.
*
* @param asciiname the ascii name to set
*/
public void setAsciiname(final String asciiname) {
this.asciiname = asciiname;
}
/**
* Gets the alternate names.
*
* @return the alternatenames
*/
public String getAlternatenames() {
return alternatenames;
}
/**
* Sets the alternate names.
*
* @param alternatenames the alternate names to set
*/
public void setAlternatenames(final String alternatenames) {
this.alternatenames = alternatenames;
}
/**
* Gets the latitude.
*
* @return the latitude
*/
public Double getLatitude() {
return latitude;
}
/**
* Sets the latitude.
*
* @param latitude the latitude to set
*/
public void setLatitude(final Double latitude) {
this.latitude = latitude;
}