Commit 67396eeb authored by Matija Obreza's avatar Matija Obreza
Browse files

Added Dataset Descriptors and updated SGSV upload

parent 18aa8c47
......@@ -18,11 +18,14 @@ package org.crophub.rest.common.model.impl;
import java.text.MessageFormat;
import java.util.Date;
import java.util.List;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Lob;
import javax.persistence.ManyToOne;
import javax.persistence.OneToMany;
import javax.persistence.OrderBy;
import javax.persistence.Table;
import org.crophub.rest.common.model.BusinessModel;
......@@ -49,8 +52,13 @@ public class Dataset extends BusinessModel {
@Lob
private String mapping;
private Date uploadDate;
@OrderBy("orderIndex")
@OneToMany(cascade={}, mappedBy="dataset")
private List<DatasetDescriptor> datasetDescriptors;
public License getLicense() {
return license;
}
......@@ -117,6 +125,15 @@ public class Dataset extends BusinessModel {
this.mapping = mapping;
}
public List<DatasetDescriptor> getDatasetDescriptors() {
return datasetDescriptors;
}
public void setDatasetDescriptors(List<DatasetDescriptor> datasetDescriptors) {
this.datasetDescriptors = datasetDescriptors;
}
@Override
public String toString() {
return MessageFormat.format("Dataset id={0,number,#} date={1,date,dd/MM/yyyy} name={2}", id, uploadDate, name);
......
/**
* Copyright 2013 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.crophub.rest.common.model.impl;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.JoinColumn;
import javax.persistence.ManyToOne;
import javax.persistence.Table;
import javax.persistence.UniqueConstraint;
import org.crophub.rest.common.model.BusinessModel;
@Entity
@Table(name = "datasetdescriptor", uniqueConstraints = { @UniqueConstraint(columnNames = { "datasetId", "descriptorId" }) })
public class DatasetDescriptor extends BusinessModel {
private static final long serialVersionUID = 2413430585742976014L;
@ManyToOne(optional = false)
@JoinColumn(name = "datasetId")
private Dataset dataset;
@Column(nullable = false)
private int orderIndex;
@ManyToOne(optional = false)
@JoinColumn(name = "descriptorId")
private Descriptor descriptor;
public Dataset getDataset() {
return dataset;
}
public void setDataset(Dataset dataset) {
this.dataset = dataset;
}
public int getOrderIndex() {
return orderIndex;
}
public void setOrderIndex(int orderIndex) {
this.orderIndex = orderIndex;
}
public Descriptor getDescriptor() {
return descriptor;
}
public void setDescriptor(Descriptor descriptor) {
this.descriptor = descriptor;
}
}
......@@ -34,10 +34,10 @@ import org.hibernate.annotations.Index;
@Entity
@Table(name = "sparsedata")
@org.hibernate.annotations.Table(appliesTo = "sparsedata", indexes = {
@Index(columnNames = { "descriptorId", "stringId" }, name = "descriptorstring_SPARSEDATA"),
@Index(columnNames = { "datasetDescriptorId", "stringId" }, name = "descriptorstring_SPARSEDATA"),
@Index(columnNames = { "sparseEntryId" }, name = "entry_SPARSEDATA"),
@Index(columnNames = { "datasetId", "descriptorId" }, name = "datasetdescriptor_SPARSEDATA"),
@Index(columnNames = { "descriptorId" }, name = "descriptor_SPARSEDATA") })
@Index(columnNames = { "datasetId", "datasetDescriptorId" }, name = "datasetdescriptor_SPARSEDATA"),
@Index(columnNames = { "datasetDescriptorId" }, name = "descriptor_SPARSEDATA") })
public class SparseData extends BusinessModel {
private static final long serialVersionUID = -2142036544458439223L;
......@@ -48,7 +48,7 @@ public class SparseData extends BusinessModel {
private long sparseEntryId;
@OrderColumn
private long descriptorId;
private long datasetDescriptorId;
@Column(nullable = false, length = 500)
private String value;
......@@ -73,12 +73,12 @@ public class SparseData extends BusinessModel {
this.sparseEntryId = sparseEntryId;
}
public long getDescriptorId() {
return descriptorId;
public long getDatasetDescriptorId() {
return datasetDescriptorId;
}
public void setDescriptorId(final long descriptorId) {
this.descriptorId = descriptorId;
public void setDatasetDescriptorId(final long datasetDescriptorId) {
this.datasetDescriptorId = datasetDescriptorId;
}
public String getValue() {
......
/**
* Copyright 2013 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.crophub.rest.common.persistence.domain;
import java.util.List;
import org.crophub.rest.common.model.impl.Dataset;
import org.crophub.rest.common.model.impl.DatasetDescriptor;
import org.crophub.rest.common.model.impl.Descriptor;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
public interface DatasetDescriptorRepository extends JpaRepository<DatasetDescriptor, Long> {
@Query("select dsd.descriptor from DatasetDescriptor dsd where dsd.dataset=?1 order by dsd.orderIndex")
List<Descriptor> findDescriptorsByDataset(Dataset dataset);
List<DatasetDescriptor> findByDataset(Dataset dataset);
DatasetDescriptor findByDatasetAndDescriptor(Dataset dataset, Descriptor descriptor);
}
......@@ -19,7 +19,6 @@ package org.crophub.rest.common.persistence.domain;
import java.util.List;
import org.crophub.rest.common.model.impl.SparseData;
import org.crophub.rest.common.model.impl.SparseString;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
......@@ -31,26 +30,26 @@ public interface SparseDataRepository extends JpaRepository<SparseData, Long> {
@Cacheable(value="sparsedata", key="#root.methodName+#a0")
long countByDatasetId(long datasetId);
@Query("select distinct sd.descriptorId from SparseData sd where sd.datasetId=?1")
@Cacheable(value="sparsedata", key="#root.methodName+#a0")
List<Long> getDescriptorIdsByDatasetId(long datasetId);
// @Query("select distinct sd.descriptorId from SparseData sd where sd.datasetId=?1")
// @Cacheable(value="sparsedata", key="#root.methodName+#a0")
// List<Long> getDescriptorIdsByDatasetId(long datasetId);
@Query("select count(sd) from SparseData sd where sd.sparseEntryId=?1")
long countBySparseEntryId(long sparseEntryId);
List<SparseData> findByDescriptorId(long descriptorId);
//List<SparseData> findByDescriptorId(long descriptorId);
@Query("select count(sd) from SparseData sd where sd.descriptorId=?1")
long countByDescriptorId(long descriptorId);
//@Query("select count(sd) from SparseData sd where sd.descriptorId=?1")
//long countByDescriptorId(long descriptorId);
List<SparseData> findByDescriptorIdAndValue(long descriptorId, SparseString value);
//List<SparseData> findByDescriptorIdAndValue(long descriptorId, SparseString value);
@Query("select count(sd) from SparseData sd where sd.descriptorId=?1 and sd.value=?2")
long countByDescriptorIdAndValue(long descriptorId, SparseString value);
//@Query("select count(sd) from SparseData sd where sd.descriptorId=?1 and sd.value=?2")
//long countByDescriptorIdAndValue(long descriptorId, SparseString value);
List<SparseData> findByDescriptorIdAndSparseString(long descriptorId,
SparseString string);
//List<SparseData> findByDescriptorIdAndSparseString(long descriptorId,
// SparseString string);
@Query("select count(sd) from SparseData sd where sd.descriptorId=?1 and sd.sparseString=?2")
long countByDescriptorIdAndSparseString(long descriptorId, SparseString string);
//@Query("select count(sd) from SparseData sd where sd.descriptorId=?1 and sd.sparseString=?2")
//long countByDescriptorIdAndSparseString(long descriptorId, SparseString string);
}
......@@ -19,10 +19,15 @@ package org.crophub.rest.common.service;
import java.util.List;
import org.crophub.rest.common.model.impl.Dataset;
import org.crophub.rest.common.model.impl.DatasetDescriptor;
import org.crophub.rest.common.model.impl.Descriptor;
public interface DescriptorService {
List<Descriptor> list();
DatasetDescriptor[] getDatasetDescriptors(Dataset dataset, Descriptor[] descriptors);
DatasetDescriptor[] ensureDatasetDescriptors(Dataset dataset, Descriptor[] descriptors);
}
......@@ -7,25 +7,26 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.crophub.rest.common.model.impl.Dataset;
import org.crophub.rest.common.model.impl.DatasetDescriptor;
import org.crophub.rest.common.model.impl.Descriptor;
import org.crophub.rest.common.model.impl.SparseData;
import org.crophub.rest.common.model.impl.SparseEntry;
import org.crophub.rest.common.persistence.domain.DatasetDescriptorRepository;
import org.crophub.rest.common.persistence.domain.DatasetRepository;
import org.crophub.rest.common.persistence.domain.DescriptorRepository;
import org.crophub.rest.common.persistence.domain.SparseDataRepository;
import org.crophub.rest.common.persistence.domain.SparseEntryRepository;
import org.crophub.rest.common.service.DataService;
import org.crophub.rest.common.service.DescriptorService;
import org.crophub.rest.common.service.SparseStringService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.CacheEvict;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
@Service
@Transactional(readOnly = true)
public class DataServiceImpl implements DataService {
public static final Log LOG = LogFactory
.getLog(DataServiceImpl.class);
public static final Log LOG = LogFactory.getLog(DataServiceImpl.class);
@Autowired
SparseEntryRepository entryRepository;
......@@ -34,7 +35,10 @@ public class DataServiceImpl implements DataService {
SparseDataRepository dataRepository;
@Autowired
DescriptorRepository descriptorRepository;
DatasetDescriptorRepository datasetDescriptorRepository;
@Autowired
DescriptorService descriptorService;
@Autowired
SparseStringService stringService;
......@@ -52,9 +56,8 @@ public class DataServiceImpl implements DataService {
return datasetRepository.findOne(datasetId);
}
@Override
@Transactional(readOnly=false)
@Transactional(readOnly = false)
public void save(Dataset dataset) {
datasetRepository.save(dataset);
}
......@@ -71,20 +74,19 @@ public class DataServiceImpl implements DataService {
@Override
public Iterable<Descriptor> getDescriptors(Dataset dataset) {
return descriptorRepository.findAll(dataRepository.getDescriptorIdsByDatasetId(dataset.getId()));
return datasetDescriptorRepository.findDescriptorsByDataset(dataset);
}
// FIXME This code does not handle column grouping
@Override
@Transactional(readOnly = false)
@CacheEvict(value="methods", allEntries=true)
public void writeEntries(final Dataset dataset, final Descriptor[] descriptors,
final List<String[]> datas) {
@Transactional(propagation = Propagation.REQUIRED, readOnly = false)
public void writeEntries(final Dataset dataset, final Descriptor[] descriptors, final List<String[]> datas) {
LOG.debug("Writing batch of " + datas.size());
final DatasetDescriptor[] datasetDescriptors = descriptorService.getDatasetDescriptors(dataset, descriptors);
final List<SparseEntry> sparseEntries = new ArrayList<SparseEntry>(
datas.size());
final List<SparseData> sparseDatas = new ArrayList<SparseData>(
descriptors.length);
final List<SparseEntry> sparseEntries = new ArrayList<SparseEntry>(datas.size());
final List<SparseData> sparseDatas = new ArrayList<SparseData>(descriptors.length);
for (int j = 0; j < datas.size(); j++) {
// New entry
......@@ -95,6 +97,9 @@ public class DataServiceImpl implements DataService {
// save the lot
entryRepository.save(sparseEntries);
LOG.debug("sparseEntries.size=" + sparseEntries.size());
long datasetId = dataset.getId();
for (int j = 0; j < datas.size(); j++) {
final String[] data = datas.get(j);
......@@ -111,15 +116,19 @@ public class DataServiceImpl implements DataService {
valueCount++;
final SparseData sparseData = new SparseData();
sparseData.setSparseEntryId(sparseEntryId);
sparseData.setDescriptorId(descriptors[i].getId());
// Can throw NPE! Cool :-)
sparseData.setDatasetDescriptorId(datasetDescriptors[i].getId());
sparseData.setDatasetId(datasetId);
sparseData.setValue(data[i]);
sparseDatas.add(sparseData);
}
}
if (valueCount == 0) {
entryRepository.delete(sparseEntries.remove(j));
LOG.warn("No data, removing entry");
entryRepository.delete(sparseEntries.get(j));
}
}
dataRepository.save(sparseDatas);
}
}
......@@ -2,22 +2,86 @@ package org.crophub.rest.common.service.impl;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.crophub.rest.common.model.impl.Dataset;
import org.crophub.rest.common.model.impl.DatasetDescriptor;
import org.crophub.rest.common.model.impl.Descriptor;
import org.crophub.rest.common.persistence.domain.DatasetDescriptorRepository;
import org.crophub.rest.common.persistence.domain.DescriptorRepository;
import org.crophub.rest.common.service.DescriptorService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
@Service
@Transactional(readOnly = true)
public class DescriptorServiceImpl implements DescriptorService {
public static final Log LOG = LogFactory.getLog(DescriptorServiceImpl.class);
@Autowired
private DescriptorRepository descriptorRepository;
@Autowired
private DatasetDescriptorRepository datasetDescriptorRepository;
@Override
public List<Descriptor> list() {
return descriptorRepository.findAll();
}
/**
* Fetch {@link DatasetDescriptor}s.
*
* @param dataset
* @param descriptors
* @return
*/
@Override
@Transactional
public DatasetDescriptor[] getDatasetDescriptors(final Dataset dataset, final Descriptor[] descriptors) {
final DatasetDescriptor[] datasetDescriptors = new DatasetDescriptor[descriptors.length];
for (int i = 0; i < descriptors.length; i++) {
if (descriptors[i] == null) {
// Skip null descriptor
continue;
}
datasetDescriptors[i] = datasetDescriptorRepository.findByDatasetAndDescriptor(dataset, descriptors[i]);
}
return datasetDescriptors;
}
/**
* Fetch and ensure {@link DatasetDescriptor}s exist.
*
* @param dataset
* @param descriptors
* @return
*/
@Override
@Transactional(propagation = Propagation.REQUIRES_NEW, readOnly = false)
public DatasetDescriptor[] ensureDatasetDescriptors(final Dataset dataset, final Descriptor[] descriptors) {
final DatasetDescriptor[] datasetDescriptors = new DatasetDescriptor[descriptors.length];
for (int i = 0; i < descriptors.length; i++) {
if (descriptors[i] == null) {
// Skip null descriptor
continue;
}
datasetDescriptors[i] = datasetDescriptorRepository.findByDatasetAndDescriptor(dataset, descriptors[i]);
if (datasetDescriptors[i] == null) {
LOG.info("Adding new dataset descriptor for " + descriptors[i]);
DatasetDescriptor dsd = new DatasetDescriptor();
dsd.setDataset(dataset);
dsd.setDescriptor(descriptors[i]);
dsd.setOrderIndex(i);
datasetDescriptors[i] = datasetDescriptorRepository.save(dsd);
}
}
LOG.info("Done with dataset descriptors");
return datasetDescriptors;
}
}
......@@ -52,6 +52,7 @@ import org.crophub.rest.common.model.impl.Dataset;
import org.crophub.rest.common.model.impl.Descriptor;
import org.crophub.rest.common.persistence.domain.DescriptorRepository;
import org.crophub.rest.common.service.DataService;
import org.crophub.rest.common.service.DescriptorService;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
......@@ -82,6 +83,9 @@ public class SGSVUpdate extends RunnerTest {
@Autowired
private DataService dataService;
@Autowired
private DescriptorService descriptorService;
@Test
public void updateSGSV() {
......@@ -117,7 +121,9 @@ public class SGSVUpdate extends RunnerTest {
@Test
public void updateSGSVFile() {
final File file = new File(filesPath, "sgsv_templates_20130502.tab");
// sgsv_templates_20130610.tab
// sgsv_templates_20130502.tab
final File file = new File(filesPath, "sgsv_templates_20130610.tab");
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
......@@ -140,6 +146,8 @@ public class SGSVUpdate extends RunnerTest {
reader = new CSVReader(new BufferedReader(new InputStreamReader(str)), '\t', '"', false);
final String[] headers = reader.readNext();
LOG.debug("Headers: " + headers.length);
assertTrue(headers.length == 30);
final Descriptor[] descriptors = new Descriptor[headers.length];
descriptors[0] = descriptorRepository.findByCode("SGSV_ID");
......@@ -165,9 +173,6 @@ public class SGSVUpdate extends RunnerTest {
descriptors[27] = descriptorRepository.findByCode("VERNACULAR_NAME");
descriptors[29] = descriptorRepository.findByCode("SGSV_GENUSID");
LOG.debug("Headers: " + headers.length);
assertTrue(headers.length == 30);
final ColumnGroups grouping = new ColumnGroups();
final Map<Integer, Long> traitMap = new HashMap<Integer, Long>();
......@@ -184,7 +189,7 @@ public class SGSVUpdate extends RunnerTest {
}
final Dataset dataset = new Dataset();
dataset.setName("SGSV update");
dataset.setName("SGSV update " + new Date());
dataset.setHeaders(mapper.writeValueAsString(headers));
final DataMapping dataMapping = new DataMapping();
......@@ -196,6 +201,9 @@ public class SGSVUpdate extends RunnerTest {
dataset.setUploadDate(new Date());
dataService.save(dataset);
// Ensure descriptors exist
descriptorService.ensureDatasetDescriptors(dataset, descriptors);
final List<String[]> bulk = new ArrayList<String[]>(1000);
String[] line = null;
......@@ -249,7 +257,15 @@ public class SGSVUpdate extends RunnerTest {
}
}
threadPool.execute(new SparseDataUpdater(dataService, dataset, descriptors, new ArrayList<String[]>(bulk)));
// Need copy!
final ArrayList<String[]> bulkCopy = new ArrayList<String[]>(bulk);
threadPool.execute(new Runnable() {
@Override
public void run() {
dataService.writeEntries(dataset, descriptors, bulkCopy);
}
});
}
private void shutdownPool() {
......@@ -269,24 +285,4 @@ public class SGSVUpdate extends RunnerTest {
}
}
private static class SparseDataUpdater implements Runnable {
private final Dataset dataset;
private final Descriptor[] descriptors;
private final ArrayList<String[]> bulk;
private final DataService dataService;
public SparseDataUpdater(final DataService dataService, final Dataset dataset, final Descriptor[] descriptors, final ArrayList<String[]> bulk) {
this.dataService = dataService;
this.dataset = dataset;
this.descriptors = descriptors;
this.bulk = bulk;
}
@Override
public void run() {
dataService.writeEntries(dataset, descriptors, bulk);
}
}
}
#-------------------------------------------------------------------------------
# Copyright 2013 Global Crop Diversity Trust
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-------------------------------------------------------------------------------
### direct log messages to stdout ###
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.File=./logs/auth.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
### set log levels - for more verbose logging change 'info' to 'debug' ###
log4j.rootLogger=info, stdout
log4j.category.org.crophub=info