Commit 62c2646b authored by Matija Obreza's avatar Matija Obreza

More efficient download of MCPD for Subsets and Datasets

parent 36250557
......@@ -191,6 +191,6 @@ public class DatasetAccessionRefRepositoryCustomImpl implements DatasetAccession
@Override
public List<DatasetAccessionRef> findAll(BooleanExpression in) {
return jpaQueryFactory.select(QDatasetAccessionRef.datasetAccessionRef).where(in).fetch();
return jpaQueryFactory.selectFrom(QDatasetAccessionRef.datasetAccessionRef).where(in).fetch();
}
}
......@@ -33,6 +33,8 @@ import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.validation.Valid;
import org.genesys.blocks.security.service.CustomAclService;
......@@ -68,6 +70,7 @@ import org.genesys2.server.model.UserRole;
import org.genesys2.server.model.genesys.Accession;
import org.genesys2.server.model.genesys.AccessionRef;
import org.genesys2.server.model.genesys.QAccession;
import org.genesys2.server.model.impl.QSubsetAccessionRef;
import org.genesys2.server.service.DownloadService;
import org.genesys2.server.service.worker.AccessionRefMatcher;
import org.genesys2.util.JPAUtils;
......@@ -79,6 +82,7 @@ import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.data.jpa.repository.support.Querydsl;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.security.access.prepost.PostAuthorize;
import org.springframework.security.access.prepost.PreAuthorize;
......@@ -93,7 +97,10 @@ import org.springframework.web.multipart.MultipartFile;
import com.google.common.collect.Lists;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.PathBuilder;
import com.querydsl.core.types.dsl.PathBuilderFactory;
import com.querydsl.jpa.JPAExpressions;
import com.querydsl.jpa.JPQLQuery;
/**
* The Class DatasetServiceImpl.
......@@ -155,6 +162,9 @@ public class DatasetServiceImpl implements DatasetService {
@Autowired
private AccessionRefMatcher accessionRefMatcher;
@PersistenceContext
private EntityManager entityManager;
/**
* {@inheritDoc}
*/
......@@ -339,7 +349,19 @@ public class DatasetServiceImpl implements DatasetService {
public void writeXlsxMCPD(Dataset dataset, OutputStream outputStream) throws IOException {
Predicate predicate = QAccession.accession.id.in(JPAExpressions.selectFrom(QDatasetAccessionRef.datasetAccessionRef)
.select(QDatasetAccessionRef.datasetAccessionRef.accession.id).where(QDatasetAccessionRef.datasetAccessionRef.dataset.eq(dataset)));
downloadService.writeXlsxMCPD(predicate, outputStream);
PathBuilder<Accession> builder = new PathBuilderFactory().create(Accession.class);
Querydsl querydsl = new Querydsl(entityManager, builder);
JPQLQuery<Long> queryAccessionId = querydsl.createQuery(QSubsetAccessionRef.subsetAccessionRef)
// select id only
.select(QSubsetAccessionRef.subsetAccessionRef.accession.id)
// order by id
.orderBy(QSubsetAccessionRef.subsetAccessionRef.accession.id.asc());
// Apply where
queryAccessionId.where(predicate);
downloadService.writeXlsxMCPD(queryAccessionId, outputStream, "", "/datasets/" + dataset.getUuid());
}
/**
......@@ -970,6 +992,10 @@ public class DatasetServiceImpl implements DatasetService {
@Override
@Transactional(readOnly = false, propagation = Propagation.REQUIRED, isolation = Isolation.READ_UNCOMMITTED)
public int clearAccessionRefs(Collection<Accession> accessions) {
if (accessions == null || accessions.isEmpty()) {
return 0;
}
List<DatasetAccessionRef> referencedRefs = accessionRefRepository.findAll(QDatasetAccessionRef.datasetAccessionRef.accession.in(accessions));
referencedRefs.forEach((ref) -> {
......
......@@ -202,6 +202,6 @@ public class SubsetAccessionRefRepositoryCustomImpl implements SubsetAccessionRe
@Override
public List<SubsetAccessionRef> findAll(BooleanExpression in) {
return jpaQueryFactory.select(QSubsetAccessionRef.subsetAccessionRef).where(in).fetch();
return jpaQueryFactory.selectFrom(QSubsetAccessionRef.subsetAccessionRef).where(in).fetch();
}
}
......@@ -22,6 +22,8 @@ import java.io.OutputStreamWriter;
import java.util.List;
import com.querydsl.core.types.Predicate;
import com.querydsl.jpa.JPQLQuery;
import org.genesys.catalog.model.dataset.Dataset;
import org.genesys.catalog.model.traits.Descriptor;
import org.genesys2.server.model.genesys.Metadata;
......@@ -38,7 +40,9 @@ public interface DownloadService {
void writeXlsxMCPD(AccessionFilter accessionFilter, OutputStream outputStream) throws IOException;
void writeXlsxMCPD(Predicate predicate, OutputStream outputStream) throws IOException;
void writeXlsxMCPD(Predicate predicate, OutputStream outputStream, String filters, String dataSource) throws IOException;
void writeXlsxMCPD(JPQLQuery<Long> queryAccessionId, OutputStream outputStream, String filters, String dataSource) throws IOException;
void writeXlsxPDCI(AccessionFilter accessionFilter, OutputStream outputStream) throws IOException;
......@@ -52,4 +56,6 @@ public interface DownloadService {
void writeXlsxGenesys1Descriptors(Crop crop, OutputStream outputStream) throws IOException;
}
......@@ -33,7 +33,14 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.PathBuilder;
import com.querydsl.core.types.dsl.PathBuilderFactory;
import com.querydsl.jpa.JPQLQuery;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.POIXMLProperties;
import org.apache.poi.common.usermodel.HyperlinkType;
......@@ -69,6 +76,7 @@ import org.genesys2.server.model.genesys.ExperimentAccessionTrait;
import org.genesys2.server.model.genesys.Metadata;
import org.genesys2.server.model.genesys.PDCI;
import org.genesys2.server.model.genesys.Parameter;
import org.genesys2.server.model.genesys.QAccession;
import org.genesys2.server.model.genesys.Taxonomy2;
import org.genesys2.server.model.genesys.TraitCode;
import org.genesys2.server.model.impl.Country;
......@@ -90,6 +98,7 @@ import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.i18n.LocaleContextHolder;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.repository.support.Querydsl;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
......@@ -167,6 +176,9 @@ public class DownloadServiceImpl implements DownloadService {
private String genesysCatalogUrl;
private HashMap<String, Method> pdciMethods;
@PersistenceContext
private EntityManager entityManager;
private static class WorkbookStyles {
CellStyle dateStyle;
......@@ -189,11 +201,23 @@ public class DownloadServiceImpl implements DownloadService {
}
@Override
public void writeXlsxMCPD(Predicate predicate, OutputStream outputStream) throws IOException {
writeXlsxMCPD(predicate, outputStream, "", "");
public void writeXlsxMCPD(Predicate predicate, OutputStream outputStream, String filters, String dataSource) throws IOException {
PathBuilder<Accession> builder = new PathBuilderFactory().create(Accession.class);
Querydsl querydsl = new Querydsl(entityManager, builder);
JPQLQuery<Long> query = querydsl.createQuery(QAccession.accession)
// select id only
.select(QAccession.accession.id)
// order by id
.orderBy(QAccession.accession.id.asc());
// apply filter
query.where(predicate);
writeXlsxMCPD(query, outputStream, filters, dataSource);
}
private void writeXlsxMCPD(Predicate predicate, OutputStream outputStream, String filters, String dataSource) throws IOException {
@Override
public void writeXlsxMCPD(JPQLQuery<Long> queryAccessionId, OutputStream outputStream, String filters, String dataSource) throws IOException {
XSSFWorkbook template = new XSSFWorkbook(getClass().getResourceAsStream("/template/download/MCPD.xlsx"));
POIXMLProperties props = template.getProperties();
......@@ -239,12 +263,12 @@ public class DownloadServiceImpl implements DownloadService {
// Write accession information
final Writer writer = new Writer();
try {
accessionProcessor.process(predicate, (accessions) -> {
accessionProcessor.process(queryAccessionId, (accessions) -> {
for (Accession accession: accessions) {
writer.processMCPD(sheet, wbStyles, accession);
}
return accessions;
});
}, null);
LOG.warn("Done streaming MCPD rows");
} catch (Exception e) {
LOG.warn("Error generating: {}", e.getMessage());
......
......@@ -70,6 +70,7 @@ import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.data.jpa.repository.support.Querydsl;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.security.access.prepost.PostAuthorize;
import org.springframework.security.access.prepost.PreAuthorize;
......@@ -83,7 +84,10 @@ import org.springframework.validation.annotation.Validated;
import com.google.common.collect.Lists;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.PathBuilder;
import com.querydsl.core.types.dsl.PathBuilderFactory;
import com.querydsl.jpa.JPAExpressions;
import com.querydsl.jpa.JPQLQuery;
/**
* The Class SubsetServiceImpl.
......@@ -674,6 +678,10 @@ public class SubsetServiceImpl implements SubsetService {
@Override
@Transactional(readOnly = false, propagation = Propagation.REQUIRED, isolation = Isolation.READ_UNCOMMITTED)
public int clearAccessionRefs(Collection<Accession> accessions) {
if (accessions == null || accessions.isEmpty()) {
return 0;
}
List<SubsetAccessionRef> referencedRefs = accessionRefRepository.findAll(QSubsetAccessionRef.subsetAccessionRef.accession.in(accessions));
referencedRefs.forEach((ref) -> {
......@@ -688,7 +696,19 @@ public class SubsetServiceImpl implements SubsetService {
public void writeXlsxMCPD(Subset subset, OutputStream outputStream) throws IOException {
Predicate predicate = QAccession.accession.id.in(JPAExpressions.selectFrom(QSubsetAccessionRef.subsetAccessionRef)
.select(QSubsetAccessionRef.subsetAccessionRef.accession.id).where(QSubsetAccessionRef.subsetAccessionRef.subset.eq(subset)));
downloadService.writeXlsxMCPD(predicate, outputStream);
PathBuilder<Accession> builder = new PathBuilderFactory().create(Accession.class);
Querydsl querydsl = new Querydsl(entityManager, builder);
JPQLQuery<Long> queryAccessionId = querydsl.createQuery(QSubsetAccessionRef.subsetAccessionRef)
// select id only
.select(QSubsetAccessionRef.subsetAccessionRef.accession.id)
// order by id
.orderBy(QSubsetAccessionRef.subsetAccessionRef.accession.id.asc());
// Apply where
queryAccessionId.where(predicate);
downloadService.writeXlsxMCPD(queryAccessionId, outputStream, "", "/subsets/" + subset.getUuid());
}
/**
......@@ -697,7 +717,9 @@ public class SubsetServiceImpl implements SubsetService {
@Override
@Transactional
@PreAuthorize("(hasRole('ADMINISTRATOR') || hasPermission(#source, 'write')) && #source.published")
public Subset createNewVersion(@Valid final Subset source) {
public Subset createNewVersion(@Valid Subset source) {
source = getSubset(source);
final Subset subset = new Subset();
copyValues(subset, source);
subset.setState(PublishState.DRAFT);
......
......@@ -74,27 +74,35 @@ public class AccessionProcessor {
process(filter.buildPredicate(), action, maxSize);
}
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
public void process(Predicate predicate, IAccessionBatchAction action) throws Exception {
process(predicate, action, null);
}
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
public void process(Predicate predicate, IAccessionBatchAction action, Long maxSize) throws Exception {
long count = accessionRepository.count(predicate);
PathBuilder<Accession> builder = new PathBuilderFactory().create(Accession.class);
Querydsl querydsl = new Querydsl(em, builder);
JPQLQuery<Long> query = querydsl.createQuery(QAccession.accession)
// select id only
.select(QAccession.accession.id)
// order by id
.orderBy(QAccession.accession.id.asc());
// select id only
.select(QAccession.accession.id)
// order by id
.orderBy(QAccession.accession.id.asc());
// apply filter
query.where(predicate);
process(query, action, maxSize);
}
/**
* Advanced usage.
*
* @param accessionIdQuery query statement that returns ordered accessionIds
* (for pagination)
* @param action
* @param maxSize
* @throws Exception
*/
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
public void process(JPQLQuery<Long> accessionIdQuery, IAccessionBatchAction action, Long maxSize) throws Exception {
long count = accessionIdQuery.fetchCount();
int startPosition = 0;
StopWatch stopWatch = new StopWatch();
......@@ -103,19 +111,20 @@ public class AccessionProcessor {
List<Long> results;
do {
query.offset(startPosition);
accessionIdQuery.offset(startPosition);
// Respect maxSize
if (maxSize != null && startPosition + batchSize > maxSize) {
// we would be over allowed number of records
query.limit(maxSize - startPosition);
accessionIdQuery.limit(maxSize - startPosition);
} else {
query.limit(batchSize);
accessionIdQuery.limit(batchSize);
}
stopWatch.split();
LOG.debug("Reading Accessions. Stopwatch={}s {}+{} of {}. Processing at {} accessions/s", stopWatch.getSplitTime() / 1000, startPosition, batchSize, count, (double)(startPosition+batchSize)/(stopWatch.getSplitTime() / 1000));
results = query.fetch();
LOG.debug("Reading Accessions. Stopwatch={}s {}+{} of {}. Processing at {} accessions/s", stopWatch.getSplitTime() / 1000, startPosition, batchSize, count,
(double) (startPosition + batchSize) / (stopWatch.getSplitTime() / 1000));
results = accessionIdQuery.fetch();
loadAndProcess(results, action);
// Start position is updated above.
......@@ -129,10 +138,12 @@ public class AccessionProcessor {
LOG.info("Processing Accessions took {}ms", stopWatch.getTime());
}
private void loadAndProcess(List<Long> accessionIds, IAccessionBatchAction action) throws Exception {
List<Accession> accessions = accessionRepository.findAll(accessionIds);
action.apply(accessions);
}
/**
* Apply action on accessions matching the provided filter.
*
......@@ -144,7 +155,7 @@ public class AccessionProcessor {
apply(filter.buildPredicate(), action);
}
/**
* Apply action on accessions matching the provided filter.
*
......@@ -179,13 +190,14 @@ public class AccessionProcessor {
do {
stopWatch.split();
LOG.debug("Reading Accessions. Stopwatch={}s {}+{} of {}. Processing at {} accessions/s", stopWatch.getSplitTime() / 1000, startPosition, batchSize, count, (double)(startPosition+batchSize)/(stopWatch.getSplitTime() / 1000));
LOG.debug("Reading Accessions. Stopwatch={}s {}+{} of {}. Processing at {} accessions/s", stopWatch.getSplitTime() / 1000, startPosition, batchSize, count,
(double) (startPosition + batchSize) / (stopWatch.getSplitTime() / 1000));
results = query.fetch();
asyncUpdate(results, action);
// Next page
query.offset(startPosition += results.size());
// Clear anything cached in the entity manager
em.clear();
} while (results.size() > 0);
......
......@@ -24,11 +24,12 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.validation.ConstraintViolationException;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.catalog.model.dataset.Dataset;
import org.genesys.catalog.model.dataset.DatasetCreator;
......@@ -61,8 +62,6 @@ import org.springframework.security.test.context.support.WithMockUser;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import javax.validation.ConstraintViolationException;
@WithMockUser(username = "admin", password = "admin", roles = "ADMINISTRATOR")
public class DatasetServiceTest extends AbstractDatasetServiceTest {
......@@ -817,7 +816,7 @@ public class DatasetServiceTest extends AbstractDatasetServiceTest {
}
@Test
public void testLateAccessionRegistration() {
public void testLateAccessionRegistration() throws InterruptedException {
final Set<AccessionRef> ids1 = new HashSet<>();
ids1.add(makeAccessionIdentifier(TEST_INSTCODE, "A1", "Musa", null));
ids1.add(makeAccessionIdentifier(TEST_INSTCODE, "A2", "Musa", null));
......@@ -849,6 +848,8 @@ public class DatasetServiceTest extends AbstractDatasetServiceTest {
final Accession accession2 = upsertAccession(TEST_INSTCODE, "A8", "Musa");
datasetService.rematchDatasetAccessions(dataset1);
Thread.sleep(5000);
datasets1 = datasetService.listByAccession(accession2);
assertThat(datasets1.size(), is(1));
assertThat(datasets1.get(0).getUuid(), is(dataset1.getUuid()));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment