Commit 36cffdf9 authored by Matija Obreza's avatar Matija Obreza

Merge branch '439-use-es-for-text-queries' into 'master'

Resolve "Use ES for text queries"

Closes #439

See merge request genesys-pgr/genesys-server!392
parents 134b9d79 029ab3b4
......@@ -21,15 +21,14 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.dataset.Dataset;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
import org.genesys2.server.model.PublishState;
/**
* The Class DatasetFilter.
......@@ -37,7 +36,7 @@ import org.genesys2.server.model.PublishState;
* @author Andrey Lugovskoy
* @author Matija Obreza
*/
public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> implements IFullTextFilter {
/** Any text. */
public String _text;
......@@ -113,29 +112,29 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
and.and(dataset.rights.in(rights));
}
if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/
and.andAnyOf(
ArrayUtils.addAll(
FilterHelpers.equalsAny(_text,
dataset.versionTag,
dataset.owner.shortName,
dataset.crops.any(),
dataset.descriptors.any().crop,
dataset.accessionRefs.any().genus, dataset.accessionRefs.any().instCode, dataset.accessionRefs.any().acceNumb
),
FilterHelpers.containsAll(_text,
dataset.title, dataset.description,
dataset.creators.any().fullName,
dataset.locations.any().verbatimLocality,
dataset.repositoryFiles.any().title, dataset.repositoryFiles.any().originalFilename,
dataset.owner.name,
dataset.descriptors.any().title
)
)
);
/*@formatter:on*/
}
// if (StringUtils.isNotBlank(_text)) {
// /*@formatter:off*/
// and.andAnyOf(
// ArrayUtils.addAll(
// FilterHelpers.equalsAny(_text,
// dataset.versionTag,
// dataset.owner.shortName,
// dataset.crops.any(),
// dataset.descriptors.any().crop,
// dataset.accessionRefs.any().genus, dataset.accessionRefs.any().instCode, dataset.accessionRefs.any().acceNumb
// ),
// FilterHelpers.containsAll(_text,
// dataset.title, dataset.description,
// dataset.creators.any().fullName,
// dataset.locations.any().verbatimLocality,
// dataset.repositoryFiles.any().title, dataset.repositoryFiles.any().originalFilename,
// dataset.owner.name,
// dataset.descriptors.any().title
// )
// )
// );
// /*@formatter:on*/
// }
return and;
}
......@@ -149,4 +148,9 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
}
return this;
}
@Override
public String get_text() {
return _text;
}
}
......@@ -20,14 +20,13 @@ import static org.genesys.catalog.model.traits.QDescriptor.descriptor;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.NumberFilter;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.traits.Descriptor;
import org.genesys.catalog.model.traits.QDescriptor;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
......@@ -38,7 +37,7 @@ import com.querydsl.core.types.Predicate;
* @author Andrey Lugovskoy
* @author Matija Obreza
*/
public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> {
public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> implements IFullTextFilter {
/** Any text. */
public String _text;
......@@ -157,24 +156,24 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript
}
}
if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/
and.andAnyOf(
ArrayUtils.addAll(
FilterHelpers.equalsAny(_text,
descriptorPath.crop, descriptorPath.versionTag, descriptorPath.publisher,
descriptorPath.owner.shortName,
descriptorPath.descriptorLists.any().publisher, descriptorPath.descriptorLists.any().versionTag, descriptorPath.descriptorLists.any().crop
),
FilterHelpers.containsAll(_text,
descriptorPath.title, descriptorPath.description, descriptorPath.bibliographicCitation,
descriptorPath.owner.name,
descriptorPath.descriptorLists.any().title
)
)
);
/*@formatter:on*/
}
// if (StringUtils.isNotBlank(_text)) {
// /*@formatter:off*/
// and.andAnyOf(
// ArrayUtils.addAll(
// FilterHelpers.equalsAny(_text,
// descriptorPath.crop, descriptorPath.versionTag, descriptorPath.publisher,
// descriptorPath.owner.shortName,
// descriptorPath.descriptorLists.any().publisher, descriptorPath.descriptorLists.any().versionTag, descriptorPath.descriptorLists.any().crop
// ),
// FilterHelpers.containsAll(_text,
// descriptorPath.title, descriptorPath.description, descriptorPath.bibliographicCitation,
// descriptorPath.owner.name,
// descriptorPath.descriptorLists.any().title
// )
// )
// );
// /*@formatter:on*/
// }
return and;
}
......@@ -197,4 +196,8 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript
}
}
@Override
public String get_text() {
return _text;
}
}
......@@ -20,13 +20,12 @@ import static org.genesys.catalog.model.traits.QDescriptorList.descriptorList;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.traits.DescriptorList;
import org.genesys.catalog.model.traits.QDescriptorList;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
......@@ -37,7 +36,7 @@ import com.querydsl.core.types.Predicate;
* @author Andrey Lugovskoy
* @author Matija Obreza
*/
public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> {
public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> implements IFullTextFilter {
/** Any text. */
public String _text;
......@@ -98,24 +97,24 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter,
and.and(owner.buildQuery(descriptorList.owner));
}
if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/
and.andAnyOf(
ArrayUtils.addAll(
FilterHelpers.equalsAny(_text,
descriptorList.crop, descriptorList.versionTag, descriptorList.publisher,
descriptorList.owner.shortName,
descriptorList.descriptors.any().crop, descriptorList.descriptors.any().versionTag, descriptorList.descriptors.any().publisher
),
FilterHelpers.containsAll(_text,
descriptorList.title, descriptorList.description, descriptorList.bibliographicCitation,
descriptorList.owner.name,
descriptorList.descriptors.any().title
)
)
);
/*@formatter:on*/
}
// if (StringUtils.isNotBlank(_text)) {
// /*@formatter:off*/
// and.andAnyOf(
// ArrayUtils.addAll(
// FilterHelpers.equalsAny(_text,
// descriptorList.crop, descriptorList.versionTag, descriptorList.publisher,
// descriptorList.owner.shortName,
// descriptorList.descriptors.any().crop, descriptorList.descriptors.any().versionTag, descriptorList.descriptors.any().publisher
// ),
// FilterHelpers.containsAll(_text,
// descriptorList.title, descriptorList.description, descriptorList.bibliographicCitation,
// descriptorList.owner.name,
// descriptorList.descriptors.any().title
// )
// )
// );
// /*@formatter:on*/
// }
return and;
}
......@@ -129,4 +128,9 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter,
}
return this;
}
@Override
public String get_text() {
return _text;
}
}
......@@ -46,6 +46,7 @@ import org.genesys2.server.exception.NotFoundElement;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.model.UserRole;
import org.genesys2.server.service.DownloadService;
import org.genesys2.server.service.ElasticsearchService;
import org.genesys2.util.JPAUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -102,6 +103,9 @@ public class DescriptorServiceImpl implements DescriptorService {
@Autowired
private VersionManager versionManager;
@Autowired
private ElasticsearchService elasticsearchService;
/**
* {@inheritDoc}
*/
......@@ -301,8 +305,12 @@ public class DescriptorServiceImpl implements DescriptorService {
@Override
public Page<Descriptor> listDescriptors(final DescriptorFilter descriptorFilter, final Pageable page) {
Pageable markdownSortPageRequest = JPAUtils.toMarkdownSort(page, "title");
Page<Descriptor> res = descriptorRepository.findAll(new BooleanBuilder().and(descriptorFilter.buildPredicate()).and(QDescriptor.descriptor.state.in(PublishState.PUBLISHED)), markdownSortPageRequest);
return new PageImpl<>(res.getContent(), page, res.getTotalElements());
if (descriptorFilter.isFulltextQuery()) {
return elasticsearchService.findAll(Descriptor.class, descriptorFilter, descriptorFilter._text, markdownSortPageRequest);
} else {
return descriptorRepository.findAll(new BooleanBuilder().and(descriptorFilter.buildPredicate()).and(QDescriptor.descriptor.state.in(PublishState.PUBLISHED)), markdownSortPageRequest);
}
}
/**
......
......@@ -185,15 +185,19 @@ public interface AccessionService {
* @return the list of processed accessions
*/
List<Accession> processAccessions(List<Long> accessionIds, IAccessionBatchAction action);
public static interface IAccessionBatchAction {
public static interface IBatchAction<T> {
/**
* Run action on batch of Accessions
*
* @param a the accession
* @return must return the resulting {@link Accession}
*/
List<Accession> apply(List<Accession> a) throws Exception;
List<T> apply(List<T> a) throws Exception;
}
public static interface IAccessionBatchAction extends IBatchAction<Accession> {
}
class AccessionDetails {
......
......@@ -24,7 +24,12 @@ import java.util.Set;
import org.elasticsearch.index.query.QueryBuilder;
import org.genesys.blocks.model.BasicModel;
import org.genesys.blocks.model.filters.BasicModelFilter;
import org.genesys2.server.service.AccessionService.IBatchAction;
import org.genesys2.server.service.impl.SearchException;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import com.querydsl.core.types.Predicate;
/**
* The Interface ElasticsearchService.
......@@ -199,6 +204,8 @@ public interface ElasticsearchService {
<T extends BasicModel> List<T> find(Class<T> clazz, BasicModelFilter<?, ?> filter);
<T extends BasicModel> Page<T> findAll(Class<T> clazz, BasicModelFilter<?, ?> filter, String text, Pageable page);
/**
* Wrapper for search results
*/
......@@ -218,4 +225,18 @@ public interface ElasticsearchService {
return sr;
}
}
List<Double[]> distinctCoordinates(Predicate filt, String _text);
<T extends BasicModel> void process(Class<T> clazz, BasicModelFilter<?, ?> filter, IBatchAction<T> action, Long maxSize) throws Exception;
/**
* Wait until X records match specified filter in ES.
*
* @param clazz
* @param filter
* @param mustHaveCount
* @throws InterruptedException
*/
void waitForCount(Class<? extends BasicModel> clazz, BasicModelFilter<?, ?> filter, int mustHaveCount) throws InterruptedException;
}
......@@ -42,10 +42,13 @@ import com.querydsl.jpa.JPQLQuery;
/**
* Filters for {@link Accession}.
*/
public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> implements Serializable {
public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> implements Serializable, IFullTextFilter {
private static final long serialVersionUID = -1441103961567816877L;
/** Any text. */
public String _text;
/** The historic. */
public Boolean historic;
......@@ -404,4 +407,7 @@ public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession>
return doi;
}
public String get_text() {
return _text;
}
}
/*
* Copyright 2019 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys2.server.service.filter;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* IFullTextFilter is used to mark filters that use full-text search with ES
*/
public interface IFullTextFilter {
/**
* Gets the text.
*
* @return the text
*/
String get_text();
/**
* Does the filter require full-text search?
*
* @return true if {@link #get_text()} is non-blank
*/
@JsonIgnore
default boolean isFulltextQuery() {
return StringUtils.isNotBlank(get_text());
}
}
......@@ -28,8 +28,6 @@ import java.util.UUID;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.collections.CollectionUtils;
import org.genesys.blocks.auditlog.service.AuditTrailService;
import org.genesys.catalog.model.dataset.Dataset;
......@@ -69,6 +67,8 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.Tuple;
import com.querydsl.jpa.impl.JPAQuery;
......@@ -269,8 +269,8 @@ public class AccessionServiceImpl implements AccessionService {
public long countAccessions(AccessionFilter filter) {
long total = elasticsearchService.count(Accession.class, filter);
if (total < 10000) {
// If total is below 10K, use actual count
if (total < 10000 && ! filter.isFulltextQuery()) {
// If total is below 10K and no full-text query, use actual count
total = accessionRepository.count(filter.buildPredicate());
}
......@@ -368,6 +368,10 @@ public class AccessionServiceImpl implements AccessionService {
@Override
// @Cacheable(value = "apiResponses.accessionApi1.list", unless = "#result == null", keyGenerator = "shortFilterKeyGenerator")
public Page<Accession> list(AccessionFilter filter, Pageable page) {
if (filter.isFulltextQuery()) {
return elasticsearchService.findAll(Accession.class, filter, filter._text, page);
}
List<Accession> content = accessionRepository.findAll(filter, page);
long total = countAccessions(filter);
......@@ -486,6 +490,11 @@ public class AccessionServiceImpl implements AccessionService {
@Override
public Number[][] getGeoBounds(AccessionFilter filter) {
if (filter.isFulltextQuery()) {
return AccessionService.DEFAULT_GEOBOUNDS;
}
final QAccession accession = QAccession.accession;
JPAQuery<Tuple> query = jpaQueryFactory.selectFrom(accession).select(accession.accessionId.geo.latitude.min(), accession.accessionId.geo.longitude.max(),
......
......@@ -198,6 +198,11 @@ public class DownloadServiceImpl implements DownloadService {
@Override
public void writeXlsxMCPD(AccessionFilter filter, OutputStream outputStream) throws IOException {
if (filter.isFulltextQuery()) {
throw new IOException("Cannot download MCPD when using full-text queries.");
}
final String dataSource = baseUrl + "/explore?filter=" + filter.toString();
writeXlsxMCPD(filter.buildPredicate(), outputStream, filter.toString(), dataSource);
}
......@@ -768,6 +773,11 @@ public class DownloadServiceImpl implements DownloadService {
@Override
public void writeXlsxPDCI(final AccessionFilter filter, final OutputStream outputStream) throws IOException {
if (filter.isFulltextQuery()) {
throw new IOException("Cannot download MCPD when using full-text queries.");
}
XSSFWorkbook template = new XSSFWorkbook(getClass().getResourceAsStream("/template/download/PDCI.xlsx"));
// keep 1000 rows in memory, exceeding rows will be flushed to disk
......
......@@ -474,14 +474,19 @@ public class GenesysFilterServiceImpl implements GenesysFilterService {
filt.and(accessionGeo.latitude.between(latS - zoom * diffLat * .2, latN + zoom * diffLat * .2));
}
query.where(filt);
if (filter.isFulltextQuery()) {
return elasticsearchService.distinctCoordinates(filt, filter._text);
} else {
List<Tuple> results = query.fetch();
return results.stream()
.map(item -> new Double[]{
item.get(accessionGeo.longitude),
item.get(accessionGeo.latitude),
}).collect(Collectors.toList());
}
List<Tuple> results = query.fetch();
return results.stream()
.map(item -> new Double[]{
item.get(accessionGeo.longitude),
item.get(accessionGeo.latitude),
}).collect(Collectors.toList());
}
@Override
......
......@@ -1051,6 +1051,11 @@ public class GenesysServiceImpl implements GenesysService, DatasetService {
@Override
// TODO FIXME Need proper term URLs
public void writeAccessions(final AccessionFilter filter, final OutputStream outputStream) throws Exception {
if (filter.isFulltextQuery()) {
throw new IOException("Cannot download DWC-A when using full-text queries.");
}
// UTF8 is used for encoding entry names
final ZipOutputStream zos = new ZipOutputStream(outputStream);
zos.setComment("Genesys Accessions filter=" + filter);
......
......@@ -25,6 +25,7 @@ import org.genesys2.server.model.genesys.Accession;
import org.genesys2.server.model.genesys.QAccession;
import org.genesys2.server.persistence.AccessionRepository;
import org.genesys2.server.service.AccessionService;
import org.genesys2.server.service.ElasticsearchService;
import org.genesys2.server.service.AccessionService.IAccessionBatchAction;
import org.genesys2.server.service.filter.AccessionFilter;
import org.slf4j.Logger;
......@@ -64,14 +65,25 @@ public class AccessionProcessor {
/// Size of database batch scan for IDs
private int batchSize = 1000;
@Autowired
private ElasticsearchService elasticSearchService;
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
public void process(AccessionFilter filter, IAccessionBatchAction action) throws Exception {
process(filter.buildPredicate(), action, null);
if (filter.isFulltextQuery()) {
processEs(filter, action, null);
} else {
process(filter.buildPredicate(), action, null);
}
}
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
public void process(AccessionFilter filter, IAccessionBatchAction action, Long maxSize) throws Exception {
process(filter.buildPredicate(), action, maxSize);
if (filter.isFulltextQuery()) {
processEs(filter, action, maxSize);
} else {
process(filter.buildPredicate(), action, maxSize);
}
}
@Transactional(readOnly = true, propagation = Propagation.REQUIRES_NEW)
......@@ -89,6 +101,10 @@ public class AccessionProcessor {
process(query, action, maxSize);
}
private void processEs(AccessionFilter filter, IAccessionBatchAction action, Long maxSize) throws Exception {
elasticSearchService.process(Accession.class, filter, action, maxSize);
}
/**
* Advanced usage.
......
......@@ -22,6 +22,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import javax.validation.ConstraintViolationException;
import org.apache.commons.lang3.RandomStringUtils;
import org.genesys.catalog.model.filters.DescriptorFilter;
import org.genesys.catalog.model.traits.Descriptor;
......@@ -33,7 +35,7 @@ import org.genesys.catalog.model.vocab.VocabularyTerm;
import org.genesys2.server.exception.InvalidApiUsageException;
import org.genesys2.server.exception.NotFoundElement;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.model.impl.Crop;
import org.junit.Ignore;
import org.junit.Test;
import org.springframework.dao.ConcurrencyFailureException;
import org.springframework.dao.DataIntegrityViolationException;
......@@ -46,8 +48,6 @@ import org.springframework.data.domain.Sort;
import com.google.common.collect.Lists;
import javax.validation.ConstraintViolationException;
public class DescriptorServiceTest extends CatalogServiceTest {
private static final String DESCRIPTOR_TITLE_1 = "D1";
......@@ -649,6 +649,7 @@ public class DescriptorServiceTest extends CatalogServiceTest {
}
@Test
@Ignore // We don't have ES in this test suite. Skip it.
public void testKeywordSearch() {
descriptorRepository.save(setupDescriptor(null, "AAA CCDD", Category.PASSPORT, DataType.BOOLEAN, VERSION_1_0, PublishState.PUBLISHED));
descriptorRepository.save(setupDescriptor(null, "AAA BBEE1", Category.PASSPORT, DataType.BOOLEAN, VERSION_1_0, PublishState.PUBLISHED));
......
......@@ -107,17 +107,7 @@ public class ElasticQueryBuilderTest extends AbstractElasticServiceTest {
accessionUploader.upsertAccessions(institute, batch);
assertThat(accessionRepository.count(), is((long) batch.size()));
long count = 0;
do {
count = elasticsearchService.count(Accession.class, null);
if (count != batch.size()) {
LOG.warn("ES count of {} is {}!={}", Accession.class, count, batch.size());
Thread.sleep(1000);
} else {
List<Accession> search = elasticsearchService.find(Accession.class, null);
LOG.warn("Got: ", search);
}
} while (count != batch.size());
elasticsearchService.waitForCount(Accession.class, null, batch.size());
}
private ObjectNode makeAccessionJson(FaoInstitute institute, String acceNumb, String genus, Set<Integer> storage) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment