Commit cf6c1aad authored by Matija Obreza's avatar Matija Obreza

Enhanced support for full-text queries

- Prevent  downloads when ES is used
- IFullTextFilter marks ES types
parent 09c5de62
...@@ -21,15 +21,14 @@ import java.util.HashSet; ...@@ -21,15 +21,14 @@ import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.StringFilter; import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter; import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.dataset.Dataset; import org.genesys.catalog.model.dataset.Dataset;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder; import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate; import com.querydsl.core.types.Predicate;
import org.genesys2.server.model.PublishState;
/** /**
* The Class DatasetFilter. * The Class DatasetFilter.
...@@ -37,7 +36,7 @@ import org.genesys2.server.model.PublishState; ...@@ -37,7 +36,7 @@ import org.genesys2.server.model.PublishState;
* @author Andrey Lugovskoy * @author Andrey Lugovskoy
* @author Matija Obreza * @author Matija Obreza
*/ */
public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> { public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> implements IFullTextFilter {
/** Any text. */ /** Any text. */
public String _text; public String _text;
...@@ -113,29 +112,29 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> { ...@@ -113,29 +112,29 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
and.and(dataset.rights.in(rights)); and.and(dataset.rights.in(rights));
} }
if (StringUtils.isNotBlank(_text)) { // if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/ // /*@formatter:off*/
and.andAnyOf( // and.andAnyOf(
ArrayUtils.addAll( // ArrayUtils.addAll(
FilterHelpers.equalsAny(_text, // FilterHelpers.equalsAny(_text,
dataset.versionTag, // dataset.versionTag,
dataset.owner.shortName, // dataset.owner.shortName,
dataset.crops.any(), // dataset.crops.any(),
dataset.descriptors.any().crop, // dataset.descriptors.any().crop,
dataset.accessionRefs.any().genus, dataset.accessionRefs.any().instCode, dataset.accessionRefs.any().acceNumb // dataset.accessionRefs.any().genus, dataset.accessionRefs.any().instCode, dataset.accessionRefs.any().acceNumb
), // ),
FilterHelpers.containsAll(_text, // FilterHelpers.containsAll(_text,
dataset.title, dataset.description, // dataset.title, dataset.description,
dataset.creators.any().fullName, // dataset.creators.any().fullName,
dataset.locations.any().verbatimLocality, // dataset.locations.any().verbatimLocality,
dataset.repositoryFiles.any().title, dataset.repositoryFiles.any().originalFilename, // dataset.repositoryFiles.any().title, dataset.repositoryFiles.any().originalFilename,
dataset.owner.name, // dataset.owner.name,
dataset.descriptors.any().title // dataset.descriptors.any().title
) // )
) // )
); // );
/*@formatter:on*/ // /*@formatter:on*/
} // }
return and; return and;
} }
...@@ -149,4 +148,9 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> { ...@@ -149,4 +148,9 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
} }
return this; return this;
} }
@Override
public String get_text() {
return _text;
}
} }
...@@ -20,14 +20,13 @@ import static org.genesys.catalog.model.traits.QDescriptor.descriptor; ...@@ -20,14 +20,13 @@ import static org.genesys.catalog.model.traits.QDescriptor.descriptor;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.NumberFilter; import org.genesys.blocks.model.filters.NumberFilter;
import org.genesys.blocks.model.filters.StringFilter; import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter; import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.traits.Descriptor; import org.genesys.catalog.model.traits.Descriptor;
import org.genesys.catalog.model.traits.QDescriptor; import org.genesys.catalog.model.traits.QDescriptor;
import org.genesys2.server.model.PublishState; import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder; import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate; import com.querydsl.core.types.Predicate;
...@@ -38,7 +37,7 @@ import com.querydsl.core.types.Predicate; ...@@ -38,7 +37,7 @@ import com.querydsl.core.types.Predicate;
* @author Andrey Lugovskoy * @author Andrey Lugovskoy
* @author Matija Obreza * @author Matija Obreza
*/ */
public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> { public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> implements IFullTextFilter {
/** Any text. */ /** Any text. */
public String _text; public String _text;
...@@ -157,24 +156,24 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript ...@@ -157,24 +156,24 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript
} }
} }
if (StringUtils.isNotBlank(_text)) { // if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/ // /*@formatter:off*/
and.andAnyOf( // and.andAnyOf(
ArrayUtils.addAll( // ArrayUtils.addAll(
FilterHelpers.equalsAny(_text, // FilterHelpers.equalsAny(_text,
descriptorPath.crop, descriptorPath.versionTag, descriptorPath.publisher, // descriptorPath.crop, descriptorPath.versionTag, descriptorPath.publisher,
descriptorPath.owner.shortName, // descriptorPath.owner.shortName,
descriptorPath.descriptorLists.any().publisher, descriptorPath.descriptorLists.any().versionTag, descriptorPath.descriptorLists.any().crop // descriptorPath.descriptorLists.any().publisher, descriptorPath.descriptorLists.any().versionTag, descriptorPath.descriptorLists.any().crop
), // ),
FilterHelpers.containsAll(_text, // FilterHelpers.containsAll(_text,
descriptorPath.title, descriptorPath.description, descriptorPath.bibliographicCitation, // descriptorPath.title, descriptorPath.description, descriptorPath.bibliographicCitation,
descriptorPath.owner.name, // descriptorPath.owner.name,
descriptorPath.descriptorLists.any().title // descriptorPath.descriptorLists.any().title
) // )
) // )
); // );
/*@formatter:on*/ // /*@formatter:on*/
} // }
return and; return and;
} }
...@@ -197,4 +196,8 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript ...@@ -197,4 +196,8 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript
} }
} }
@Override
public String get_text() {
return _text;
}
} }
...@@ -20,13 +20,12 @@ import static org.genesys.catalog.model.traits.QDescriptorList.descriptorList; ...@@ -20,13 +20,12 @@ import static org.genesys.catalog.model.traits.QDescriptorList.descriptorList;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.StringFilter; import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter; import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.traits.DescriptorList; import org.genesys.catalog.model.traits.DescriptorList;
import org.genesys.catalog.model.traits.QDescriptorList; import org.genesys.catalog.model.traits.QDescriptorList;
import org.genesys2.server.model.PublishState; import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder; import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate; import com.querydsl.core.types.Predicate;
...@@ -37,7 +36,7 @@ import com.querydsl.core.types.Predicate; ...@@ -37,7 +36,7 @@ import com.querydsl.core.types.Predicate;
* @author Andrey Lugovskoy * @author Andrey Lugovskoy
* @author Matija Obreza * @author Matija Obreza
*/ */
public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> { public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> implements IFullTextFilter {
/** Any text. */ /** Any text. */
public String _text; public String _text;
...@@ -98,24 +97,24 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, ...@@ -98,24 +97,24 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter,
and.and(owner.buildQuery(descriptorList.owner)); and.and(owner.buildQuery(descriptorList.owner));
} }
if (StringUtils.isNotBlank(_text)) { // if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/ // /*@formatter:off*/
and.andAnyOf( // and.andAnyOf(
ArrayUtils.addAll( // ArrayUtils.addAll(
FilterHelpers.equalsAny(_text, // FilterHelpers.equalsAny(_text,
descriptorList.crop, descriptorList.versionTag, descriptorList.publisher, // descriptorList.crop, descriptorList.versionTag, descriptorList.publisher,
descriptorList.owner.shortName, // descriptorList.owner.shortName,
descriptorList.descriptors.any().crop, descriptorList.descriptors.any().versionTag, descriptorList.descriptors.any().publisher // descriptorList.descriptors.any().crop, descriptorList.descriptors.any().versionTag, descriptorList.descriptors.any().publisher
), // ),
FilterHelpers.containsAll(_text, // FilterHelpers.containsAll(_text,
descriptorList.title, descriptorList.description, descriptorList.bibliographicCitation, // descriptorList.title, descriptorList.description, descriptorList.bibliographicCitation,
descriptorList.owner.name, // descriptorList.owner.name,
descriptorList.descriptors.any().title // descriptorList.descriptors.any().title
) // )
) // )
); // );
/*@formatter:on*/ // /*@formatter:on*/
} // }
return and; return and;
} }
...@@ -129,4 +128,9 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, ...@@ -129,4 +128,9 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter,
} }
return this; return this;
} }
@Override
public String get_text() {
return _text;
}
} }
...@@ -185,15 +185,19 @@ public interface AccessionService { ...@@ -185,15 +185,19 @@ public interface AccessionService {
* @return the list of processed accessions * @return the list of processed accessions
*/ */
List<Accession> processAccessions(List<Long> accessionIds, IAccessionBatchAction action); List<Accession> processAccessions(List<Long> accessionIds, IAccessionBatchAction action);
public static interface IAccessionBatchAction { public static interface IBatchAction<T> {
/** /**
* Run action on batch of Accessions * Run action on batch of Accessions
* *
* @param a the accession * @param a the accession
* @return must return the resulting {@link Accession} * @return must return the resulting {@link Accession}
*/ */
List<Accession> apply(List<Accession> a) throws Exception; List<T> apply(List<T> a) throws Exception;
}
public static interface IAccessionBatchAction extends IBatchAction<Accession> {
} }
class AccessionDetails { class AccessionDetails {
......
...@@ -24,10 +24,13 @@ import java.util.Set; ...@@ -24,10 +24,13 @@ import java.util.Set;
import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilder;
import org.genesys.blocks.model.BasicModel; import org.genesys.blocks.model.BasicModel;
import org.genesys.blocks.model.filters.BasicModelFilter; import org.genesys.blocks.model.filters.BasicModelFilter;
import org.genesys2.server.service.AccessionService.IBatchAction;
import org.genesys2.server.service.impl.SearchException; import org.genesys2.server.service.impl.SearchException;
import org.springframework.data.domain.Page; import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable; import org.springframework.data.domain.Pageable;
import com.querydsl.core.types.Predicate;
/** /**
* The Interface ElasticsearchService. * The Interface ElasticsearchService.
*/ */
...@@ -222,4 +225,8 @@ public interface ElasticsearchService { ...@@ -222,4 +225,8 @@ public interface ElasticsearchService {
return sr; return sr;
} }
} }
List<Double[]> distinctCoordinates(Predicate filt, String _text);
<T extends BasicModel> void process(Class<T> clazz, BasicModelFilter<?, ?> filter, IBatchAction<T> action, Long maxSize) throws Exception;
} }
...@@ -42,7 +42,7 @@ import com.querydsl.jpa.JPQLQuery; ...@@ -42,7 +42,7 @@ import com.querydsl.jpa.JPQLQuery;
/** /**
* Filters for {@link Accession}. * Filters for {@link Accession}.
*/ */
public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> implements Serializable { public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> implements Serializable, IFullTextFilter {
private static final long serialVersionUID = -1441103961567816877L; private static final long serialVersionUID = -1441103961567816877L;
...@@ -407,4 +407,7 @@ public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> ...@@ -407,4 +407,7 @@ public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession>
return doi; return doi;
} }
public String get_text() {
return _text;
}
} }
/*
* Copyright 2019 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys2.server.service.filter;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* IFullTextFilter is used to mark filters that use full-text search with ES
*/
public interface IFullTextFilter {
/**
* Gets the text.
*
* @return the text
*/
String get_text();
/**
* Does the filter require full-text search?
*
* @return true if {@link #get_text()} is non-blank
*/
@JsonIgnore
default boolean isFulltextQuery() {
return StringUtils.isNotBlank(get_text());
}
}
...@@ -270,8 +270,8 @@ public class AccessionServiceImpl implements AccessionService { ...@@ -270,8 +270,8 @@ public class AccessionServiceImpl implements AccessionService {
public long countAccessions(AccessionFilter filter) { public long countAccessions(AccessionFilter filter) {
long total = elasticsearchService.count(Accession.class, filter); long total = elasticsearchService.count(Accession.class, filter);
if (total < 10000) { if (total < 10000 && StringUtils.isBlank(filter._text)) {
// If total is below 10K, use actual count // If total is below 10K and no full-text query, use actual count
total = accessionRepository.count(filter.buildPredicate()); total = accessionRepository.count(filter.buildPredicate());
} }
...@@ -491,6 +491,11 @@ public class AccessionServiceImpl implements AccessionService { ...@@ -491,6 +491,11 @@ public class AccessionServiceImpl implements AccessionService {
@Override @Override
public Number[][] getGeoBounds(AccessionFilter filter) { public Number[][] getGeoBounds(AccessionFilter filter) {
if (filter.isFulltextQuery()) {
return AccessionService.DEFAULT_GEOBOUNDS;
}
final QAccession accession = QAccession.accession; final QAccession accession = QAccession.accession;
JPAQuery<Tuple> query = jpaQueryFactory.selectFrom(accession).select(accession.accessionId.geo.latitude.min(), accession.accessionId.geo.longitude.max(), JPAQuery<Tuple> query = jpaQueryFactory.selectFrom(accession).select(accession.accessionId.geo.latitude.min(), accession.accessionId.geo.longitude.max(),
......
...@@ -198,6 +198,11 @@ public class DownloadServiceImpl implements DownloadService { ...@@ -198,6 +198,11 @@ public class DownloadServiceImpl implements DownloadService {
@Override @Override
public void writeXlsxMCPD(AccessionFilter filter, OutputStream outputStream) throws IOException { public void writeXlsxMCPD(AccessionFilter filter, OutputStream outputStream) throws IOException {
if (StringUtils.isNotBlank(filter._text)) {
throw new IOException("Cannot download MCPD when using full-text queries.");
}
final String dataSource = baseUrl + "/explore?filter=" + filter.toString(); final String dataSource = baseUrl + "/explore?filter=" + filter.toString();
writeXlsxMCPD(filter.buildPredicate(), outputStream, filter.toString(), dataSource); writeXlsxMCPD(filter.buildPredicate(), outputStream, filter.toString(), dataSource);
} }
...@@ -768,6 +773,11 @@ public class DownloadServiceImpl implements DownloadService { ...@@ -768,6 +773,11 @@ public class DownloadServiceImpl implements DownloadService {
@Override @Override
public void writeXlsxPDCI(final AccessionFilter filter, final OutputStream outputStream) throws IOException { public void writeXlsxPDCI(final AccessionFilter filter, final OutputStream outputStream) throws IOException {
if (StringUtils.isNotBlank(filter._text)) {
throw new IOException("Cannot download MCPD when using full-text queries.");
}
XSSFWorkbook template = new XSSFWorkbook(getClass().getResourceAsStream("/template/download/PDCI.xlsx")); XSSFWorkbook template = new XSSFWorkbook(getClass().getResourceAsStream("/template/download/PDCI.xlsx"));
// keep 1000 rows in memory, exceeding rows will be flushed to disk // keep 1000 rows in memory, exceeding rows will be flushed to disk
......
...@@ -27,6 +27,7 @@ import java.util.stream.Collectors; ...@@ -27,6 +27,7 @@ import java.util.stream.Collectors;
import javax.sql.DataSource; import javax.sql.DataSource;
import org.apache.commons.lang3.StringUtils;
import org.genesys2.server.exception.MaxPageLimitException; import org.genesys2.server.exception.MaxPageLimitException;
import org.genesys2.server.model.elastic.AccessionDetails; import org.genesys2.server.model.elastic.AccessionDetails;
import org.genesys2.server.model.genesys.Accession; import org.genesys2.server.model.genesys.Accession;
...@@ -474,14 +475,19 @@ public class GenesysFilterServiceImpl implements GenesysFilterService { ...@@ -474,14 +475,19 @@ public class GenesysFilterServiceImpl implements GenesysFilterService {
filt.and(accessionGeo.latitude.between(latS - zoom * diffLat * .2, latN + zoom * diffLat * .2)); filt.and(accessionGeo.latitude.between(latS - zoom * diffLat * .2, latN + zoom * diffLat * .2));
} }
query.where(filt); query.where(filt);
if (StringUtils.isNotBlank(filter._text)) {
return elasticsearchService.distinctCoordinates(filt, filter._text);
} else {
List<Tuple> results = query.fetch();
return results.stream()
.map(item -> new Double[]{
item.get(accessionGeo.longitude),
item.get(accessionGeo.latitude),
}).collect(Collectors.toList());
}
List<Tuple> results = query.fetch();
return results.stream()
.map(item -> new Double[]{
item.get(accessionGeo.longitude),
item.get(accessionGeo.latitude),
}).collect(Collectors.toList());
} }
@Override @Override
......
...@@ -41,6 +41,7 @@ import java.util.zip.ZipOutputStream; ...@@ -41,6 +41,7 @@ import java.util.zip.ZipOutputStream;
import javax.persistence.EntityManager; import javax.persistence.EntityManager;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.security.SecurityContextUtil; import org.genesys.blocks.security.SecurityContextUtil;
import org.genesys.blocks.security.model.AclSid; import org.genesys.blocks.security.model.AclSid;
import org.genesys.blocks.security.service.CustomAclService; import org.genesys.blocks.security.service.CustomAclService;
...@@ -1051,6 +1052,11 @@ public class GenesysServiceImpl implements GenesysService, DatasetService { ...@@ -1051,6 +1052,11 @@ public class GenesysServiceImpl implements GenesysService, DatasetService {
@Override @Override
// TODO FIXME Need proper term URLs // TODO FIXME Need proper term URLs
public void writeAccessions(final AccessionFilter filter, final OutputStream outputStream) throws Exception { public void writeAccessions(final AccessionFilter filter, final OutputStream outputStream) throws Exception {
if (StringUtils.isNotBlank(filter._text)) {
throw new IOException("Cannot download DWC-A when using full-text queries.");
}
// UTF8 is used for encoding entry names // UTF8 is used for encoding entry names
final ZipOutputStream zos = new ZipOutputStream(outputStream); final ZipOutputStream zos = new ZipOutputStream(outputStream);
zos.setComment("Genesys Accessions filter=" + filter); zos.setComment("Genesys Accessions filter=" + filter);
......
...@@ -25,6 +25,7 @@ import org.genesys2.server.model.genesys.Accession; ...@@ -25,6 +25,7 @@ import org.genesys2.server.model.genesys.Accession;
import org.genesys2.server.model.genesys.QAccession; import org.genesys2.server.model.genesys.QAccession;
import org.genesys2.server.persistence.AccessionRepository; import org.genesys2.server.persistence.AccessionRepository;
import org.genesys2.server.service.AccessionService; import org.genesys2.server.service.AccessionService;
import org.genesys2.server.service.ElasticsearchService;
import org.genesys2.server.service.AccessionService.IAccessionBatchAction; import org.genesys2.server.service.AccessionService.IAccessionBatchAction;
import org.genesys2.server.service.filter.AccessionFilter; import org.genesys2.server.service.filter.AccessionFilter;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -64,14 +65,25 @@ public class AccessionProcessor { ...@@ -64,14 +65,25 @@ public class AccessionProcessor {
/// Size of database batch scan for IDs /// Size of database batch scan for IDs
private int batchSize = 1000; private int batchSize = 1000;
@Autowired
private ElasticsearchService elasticSearchService;
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW) @Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
public void process(AccessionFilter filter, IAccessionBatchAction action) throws Exception { public void process(AccessionFilter filter, IAccessionBatchAction action) throws Exception {
process(filter.buildPredicate(), action, null); if (filter.isFulltextQuery()) {
processEs(filter, action, null);
} else {
process(filter.buildPredicate(), action, null);
}