Commit 36cffdf9 authored by Matija Obreza's avatar Matija Obreza

Merge branch '439-use-es-for-text-queries' into 'master'

Resolve "Use ES for text queries"

Closes #439

See merge request genesys-pgr/genesys-server!392
parents 134b9d79 029ab3b4
......@@ -21,15 +21,14 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.dataset.Dataset;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
import org.genesys2.server.model.PublishState;
/**
* The Class DatasetFilter.
......@@ -37,7 +36,7 @@ import org.genesys2.server.model.PublishState;
* @author Andrey Lugovskoy
* @author Matija Obreza
*/
public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> implements IFullTextFilter {
/** Any text. */
public String _text;
......@@ -113,29 +112,29 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
and.and(dataset.rights.in(rights));
}
if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/
and.andAnyOf(
ArrayUtils.addAll(
FilterHelpers.equalsAny(_text,
dataset.versionTag,
dataset.owner.shortName,
dataset.crops.any(),
dataset.descriptors.any().crop,
dataset.accessionRefs.any().genus, dataset.accessionRefs.any().instCode, dataset.accessionRefs.any().acceNumb
),
FilterHelpers.containsAll(_text,
dataset.title, dataset.description,
dataset.creators.any().fullName,
dataset.locations.any().verbatimLocality,
dataset.repositoryFiles.any().title, dataset.repositoryFiles.any().originalFilename,
dataset.owner.name,
dataset.descriptors.any().title
)
)
);
/*@formatter:on*/
}
// if (StringUtils.isNotBlank(_text)) {
// /*@formatter:off*/
// and.andAnyOf(
// ArrayUtils.addAll(
// FilterHelpers.equalsAny(_text,
// dataset.versionTag,
// dataset.owner.shortName,
// dataset.crops.any(),
// dataset.descriptors.any().crop,
// dataset.accessionRefs.any().genus, dataset.accessionRefs.any().instCode, dataset.accessionRefs.any().acceNumb
// ),
// FilterHelpers.containsAll(_text,
// dataset.title, dataset.description,
// dataset.creators.any().fullName,
// dataset.locations.any().verbatimLocality,
// dataset.repositoryFiles.any().title, dataset.repositoryFiles.any().originalFilename,
// dataset.owner.name,
// dataset.descriptors.any().title
// )
// )
// );
// /*@formatter:on*/
// }
return and;
}
......@@ -149,4 +148,9 @@ public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> {
}
return this;
}
@Override
public String get_text() {
return _text;
}
}
......@@ -20,14 +20,13 @@ import static org.genesys.catalog.model.traits.QDescriptor.descriptor;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.NumberFilter;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.traits.Descriptor;
import org.genesys.catalog.model.traits.QDescriptor;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
......@@ -38,7 +37,7 @@ import com.querydsl.core.types.Predicate;
* @author Andrey Lugovskoy
* @author Matija Obreza
*/
public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> {
public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> implements IFullTextFilter {
/** Any text. */
public String _text;
......@@ -157,24 +156,24 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript
}
}
if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/
and.andAnyOf(
ArrayUtils.addAll(
FilterHelpers.equalsAny(_text,
descriptorPath.crop, descriptorPath.versionTag, descriptorPath.publisher,
descriptorPath.owner.shortName,
descriptorPath.descriptorLists.any().publisher, descriptorPath.descriptorLists.any().versionTag, descriptorPath.descriptorLists.any().crop
),
FilterHelpers.containsAll(_text,
descriptorPath.title, descriptorPath.description, descriptorPath.bibliographicCitation,
descriptorPath.owner.name,
descriptorPath.descriptorLists.any().title
)
)
);
/*@formatter:on*/
}
// if (StringUtils.isNotBlank(_text)) {
// /*@formatter:off*/
// and.andAnyOf(
// ArrayUtils.addAll(
// FilterHelpers.equalsAny(_text,
// descriptorPath.crop, descriptorPath.versionTag, descriptorPath.publisher,
// descriptorPath.owner.shortName,
// descriptorPath.descriptorLists.any().publisher, descriptorPath.descriptorLists.any().versionTag, descriptorPath.descriptorLists.any().crop
// ),
// FilterHelpers.containsAll(_text,
// descriptorPath.title, descriptorPath.description, descriptorPath.bibliographicCitation,
// descriptorPath.owner.name,
// descriptorPath.descriptorLists.any().title
// )
// )
// );
// /*@formatter:on*/
// }
return and;
}
......@@ -197,4 +196,8 @@ public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descript
}
}
@Override
public String get_text() {
return _text;
}
}
......@@ -20,13 +20,12 @@ import static org.genesys.catalog.model.traits.QDescriptorList.descriptorList;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.blocks.model.filters.UuidModelFilter;
import org.genesys.catalog.model.traits.DescriptorList;
import org.genesys.catalog.model.traits.QDescriptorList;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.service.filter.IFullTextFilter;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.Predicate;
......@@ -37,7 +36,7 @@ import com.querydsl.core.types.Predicate;
* @author Andrey Lugovskoy
* @author Matija Obreza
*/
public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> {
public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> implements IFullTextFilter {
/** Any text. */
public String _text;
......@@ -98,24 +97,24 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter,
and.and(owner.buildQuery(descriptorList.owner));
}
if (StringUtils.isNotBlank(_text)) {
/*@formatter:off*/
and.andAnyOf(
ArrayUtils.addAll(
FilterHelpers.equalsAny(_text,
descriptorList.crop, descriptorList.versionTag, descriptorList.publisher,
descriptorList.owner.shortName,
descriptorList.descriptors.any().crop, descriptorList.descriptors.any().versionTag, descriptorList.descriptors.any().publisher
),
FilterHelpers.containsAll(_text,
descriptorList.title, descriptorList.description, descriptorList.bibliographicCitation,
descriptorList.owner.name,
descriptorList.descriptors.any().title
)
)
);
/*@formatter:on*/
}
// if (StringUtils.isNotBlank(_text)) {
// /*@formatter:off*/
// and.andAnyOf(
// ArrayUtils.addAll(
// FilterHelpers.equalsAny(_text,
// descriptorList.crop, descriptorList.versionTag, descriptorList.publisher,
// descriptorList.owner.shortName,
// descriptorList.descriptors.any().crop, descriptorList.descriptors.any().versionTag, descriptorList.descriptors.any().publisher
// ),
// FilterHelpers.containsAll(_text,
// descriptorList.title, descriptorList.description, descriptorList.bibliographicCitation,
// descriptorList.owner.name,
// descriptorList.descriptors.any().title
// )
// )
// );
// /*@formatter:on*/
// }
return and;
}
......@@ -129,4 +128,9 @@ public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter,
}
return this;
}
@Override
public String get_text() {
return _text;
}
}
......@@ -46,6 +46,7 @@ import org.genesys2.server.exception.NotFoundElement;
import org.genesys2.server.model.PublishState;
import org.genesys2.server.model.UserRole;
import org.genesys2.server.service.DownloadService;
import org.genesys2.server.service.ElasticsearchService;
import org.genesys2.util.JPAUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -102,6 +103,9 @@ public class DescriptorServiceImpl implements DescriptorService {
@Autowired
private VersionManager versionManager;
@Autowired
private ElasticsearchService elasticsearchService;
/**
* {@inheritDoc}
*/
......@@ -301,8 +305,12 @@ public class DescriptorServiceImpl implements DescriptorService {
@Override
public Page<Descriptor> listDescriptors(final DescriptorFilter descriptorFilter, final Pageable page) {
Pageable markdownSortPageRequest = JPAUtils.toMarkdownSort(page, "title");
Page<Descriptor> res = descriptorRepository.findAll(new BooleanBuilder().and(descriptorFilter.buildPredicate()).and(QDescriptor.descriptor.state.in(PublishState.PUBLISHED)), markdownSortPageRequest);
return new PageImpl<>(res.getContent(), page, res.getTotalElements());
if (descriptorFilter.isFulltextQuery()) {
return elasticsearchService.findAll(Descriptor.class, descriptorFilter, descriptorFilter._text, markdownSortPageRequest);
} else {
return descriptorRepository.findAll(new BooleanBuilder().and(descriptorFilter.buildPredicate()).and(QDescriptor.descriptor.state.in(PublishState.PUBLISHED)), markdownSortPageRequest);
}
}
/**
......
......@@ -185,15 +185,19 @@ public interface AccessionService {
* @return the list of processed accessions
*/
List<Accession> processAccessions(List<Long> accessionIds, IAccessionBatchAction action);
public static interface IAccessionBatchAction {
public static interface IBatchAction<T> {
/**
* Run action on batch of Accessions
*
* @param a the accession
* @return must return the resulting {@link Accession}
*/
List<Accession> apply(List<Accession> a) throws Exception;
List<T> apply(List<T> a) throws Exception;
}
public static interface IAccessionBatchAction extends IBatchAction<Accession> {
}
class AccessionDetails {
......
......@@ -24,7 +24,12 @@ import java.util.Set;
import org.elasticsearch.index.query.QueryBuilder;
import org.genesys.blocks.model.BasicModel;
import org.genesys.blocks.model.filters.BasicModelFilter;
import org.genesys2.server.service.AccessionService.IBatchAction;
import org.genesys2.server.service.impl.SearchException;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import com.querydsl.core.types.Predicate;
/**
* The Interface ElasticsearchService.
......@@ -199,6 +204,8 @@ public interface ElasticsearchService {
<T extends BasicModel> List<T> find(Class<T> clazz, BasicModelFilter<?, ?> filter);
<T extends BasicModel> Page<T> findAll(Class<T> clazz, BasicModelFilter<?, ?> filter, String text, Pageable page);
/**
* Wrapper for search results
*/
......@@ -218,4 +225,18 @@ public interface ElasticsearchService {
return sr;
}
}
List<Double[]> distinctCoordinates(Predicate filt, String _text);
<T extends BasicModel> void process(Class<T> clazz, BasicModelFilter<?, ?> filter, IBatchAction<T> action, Long maxSize) throws Exception;
/**
* Wait until X records match specified filter in ES.
*
* @param clazz
* @param filter
* @param mustHaveCount
* @throws InterruptedException
*/
void waitForCount(Class<? extends BasicModel> clazz, BasicModelFilter<?, ?> filter, int mustHaveCount) throws InterruptedException;
}
......@@ -42,10 +42,13 @@ import com.querydsl.jpa.JPQLQuery;
/**
* Filters for {@link Accession}.
*/
public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> implements Serializable {
public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession> implements Serializable, IFullTextFilter {
private static final long serialVersionUID = -1441103961567816877L;
/** Any text. */
public String _text;
/** The historic. */
public Boolean historic;
......@@ -404,4 +407,7 @@ public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession>
return doi;
}
public String get_text() {
return _text;
}
}
/*
* Copyright 2019 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys2.server.service.filter;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* IFullTextFilter is used to mark filters that use full-text search with ES
*/
public interface IFullTextFilter {
/**
* Gets the text.
*
* @return the text
*/
String get_text();
/**
* Does the filter require full-text search?
*
* @return true if {@link #get_text()} is non-blank
*/
@JsonIgnore
default boolean isFulltextQuery() {
return StringUtils.isNotBlank(get_text());
}
}
......@@ -28,8 +28,6 @@ import java.util.UUID;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.collections.CollectionUtils;
import org.genesys.blocks.auditlog.service.AuditTrailService;
import org.genesys.catalog.model.dataset.Dataset;
......@@ -69,6 +67,8 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.Tuple;
import com.querydsl.jpa.impl.JPAQuery;
......@@ -269,8 +269,8 @@ public class AccessionServiceImpl implements AccessionService {
public long countAccessions(AccessionFilter filter) {
long total = elasticsearchService.count(Accession.class, filter);
if (total < 10000) {
// If total is below 10K, use actual count
if (total < 10000 && ! filter.isFulltextQuery()) {
// If total is below 10K and no full-text query, use actual count
total = accessionRepository.count(filter.buildPredicate());
}
......@@ -368,6 +368,10 @@ public class AccessionServiceImpl implements AccessionService {
@Override
// @Cacheable(value = "apiResponses.accessionApi1.list", unless = "#result == null", keyGenerator = "shortFilterKeyGenerator")
public Page<Accession> list(AccessionFilter filter, Pageable page) {
if (filter.isFulltextQuery()) {
return elasticsearchService.findAll(Accession.class, filter, filter._text, page);
}
List<Accession> content = accessionRepository.findAll(filter, page);
long total = countAccessions(filter);
......@@ -486,6 +490,11 @@ public class AccessionServiceImpl implements AccessionService {
@Override
public Number[][] getGeoBounds(AccessionFilter filter) {
if (filter.isFulltextQuery()) {
return AccessionService.DEFAULT_GEOBOUNDS;
}
final QAccession accession = QAccession.accession;
JPAQuery<Tuple> query = jpaQueryFactory.selectFrom(accession).select(accession.accessionId.geo.latitude.min(), accession.accessionId.geo.longitude.max(),
......
......@@ -198,6 +198,11 @@ public class DownloadServiceImpl implements DownloadService {
@Override
public void writeXlsxMCPD(AccessionFilter filter, OutputStream outputStream) throws IOException {
if (filter.isFulltextQuery()) {
throw new IOException("Cannot download MCPD when using full-text queries.");
}
final String dataSource = baseUrl + "/explore?filter=" + filter.toString();
writeXlsxMCPD(filter.buildPredicate(), outputStream, filter.toString(), dataSource);
}
......@@ -768,6 +773,11 @@ public class DownloadServiceImpl implements DownloadService {
@Override
public void writeXlsxPDCI(final AccessionFilter filter, final OutputStream outputStream) throws IOException {
if (filter.isFulltextQuery()) {
throw new IOException("Cannot download MCPD when using full-text queries.");
}
XSSFWorkbook template = new XSSFWorkbook(getClass().getResourceAsStream("/template/download/PDCI.xlsx"));
// keep 1000 rows in memory, exceeding rows will be flushed to disk
......
......@@ -28,6 +28,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import javax.persistence.EntityManager;
......@@ -36,10 +37,6 @@ import javax.persistence.criteria.CriteriaBuilder;
import javax.persistence.criteria.CriteriaQuery;
import javax.persistence.criteria.Root;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import com.fasterxml.jackson.module.jsonSchema.types.ObjectSchema;
import com.google.common.collect.Sets;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.ListUtils;
import org.apache.commons.collections4.MapUtils;
......@@ -48,15 +45,20 @@ import org.apache.commons.lang3.time.StopWatch;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.AliasOrIndex;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.SimpleQueryStringBuilder.Operator;
import org.elasticsearch.rest.action.admin.indices.alias.delete.AliasesNotFoundException;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
......@@ -73,6 +75,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms;
import org.elasticsearch.search.aggregations.metrics.tophits.TopHits;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.genesys.blocks.model.BasicModel;
import org.genesys.blocks.model.Publishable;
......@@ -80,13 +83,19 @@ import org.genesys.blocks.model.VersionedModel;
import org.genesys.blocks.model.filters.BasicModelFilter;
import org.genesys.custom.elasticsearch.CustomMapping;
import org.genesys2.server.component.elastic.ElasticQueryBuilder;
import org.genesys2.server.model.json.Api0Constants.Accession;
import org.genesys2.server.model.json.Api1Constants;
import org.genesys2.server.service.AccessionService.IBatchAction;
import org.genesys2.server.service.ElasticsearchService;
import org.genesys2.server.service.filter.IFullTextFilter;
import org.genesys2.spring.config.ElasticsearchConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
......@@ -96,6 +105,7 @@ import org.springframework.data.elasticsearch.core.query.IndexQuery;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.data.elasticsearch.core.query.SearchQuery;
import org.springframework.data.elasticsearch.core.query.SourceFilter;
import org.springframework.data.geo.Point;
import org.springframework.data.jpa.repository.support.Querydsl;
import org.springframework.data.querydsl.SimpleEntityPathResolver;
import org.springframework.scheduling.TaskScheduler;
......@@ -104,7 +114,12 @@ import org.springframework.transaction.annotation.Transactional;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import com.fasterxml.jackson.module.jsonSchema.types.ObjectSchema;
import com.google.common.collect.Sets;
import com.querydsl.core.types.EntityPath;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.PathBuilder;
import com.querydsl.core.types.dsl.PathBuilderFactory;
import com.querydsl.jpa.JPQLQuery;
......@@ -118,6 +133,10 @@ import com.querydsl.jpa.JPQLQuery;
@Transactional(readOnly = true)
public class ElasticsearchServiceImpl implements ElasticsearchService, InitializingBean {
private static final String ACCESSION_GEO_LONGITUDE = Api1Constants.Accession.GEO + "." + Api1Constants.Geo.LONGITUDE;
private static final String ACCESSION_GEO_LATITUDE = Api1Constants.Accession.GEO + "." + Api1Constants.Geo.LATITUDE;
private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchServiceImpl.class);
private static final String INDEX_READ = "_read";
......@@ -182,6 +201,22 @@ public class ElasticsearchServiceImpl implements ElasticsearchService, Initializ
}
}
}
@Override
public void waitForCount(Class<? extends BasicModel> clazz, BasicModelFilter<?, ?> filter, int mustHaveCount) throws InterruptedException {
long count = 0;
do {
count = count(clazz, filter);
if (count != mustHaveCount) {
LOG.warn("ES count of {} is {}!={}", clazz.getName(), count, mustHaveCount);
Thread.sleep(1000);
} else {
List<?> search = find(clazz, filter);
LOG.warn("Got: ", search);
}
} while (count != mustHaveCount);
}
/**
* Makes a list of all JSON paths for indexed entity and all related types.
......@@ -749,20 +784,6 @@ public class ElasticsearchServiceImpl implements ElasticsearchService, Initializ
return termResults;
}
private QueryBuilder toEsQuery(Class<?> clazz, BasicModelFilter<?, ?> filters) {
ElasticQueryBuilder esQb = new ElasticQueryBuilder();
if (filters != null) {
filters.buildPredicate().accept(esQb, null);
}
QueryBuilder esQuery = esQb.getQuery();
try {
LOG.trace("Converted {} to\ncurl -XGET 'localhost:9200/accession_read/_search?pretty' -d '{ \"query\": \n{} }'", new ObjectMapper().writeValueAsString(filters),
esQuery);
} catch (JsonProcessingException e) {
}
return esQuery;
}
/**
* Runs TermFacet, but will automatically increase size if #otherCount is more
* than 10%
......@@ -889,4 +910,162 @@ public class ElasticsearchServiceImpl implements ElasticsearchService, Initializ
return results;
}
@Override
public <T extends BasicModel> Page<T> findAll(Class<T> clazz, BasicModelFilter<?, ?> filter, String text, Pageable page) {
String indexName = toIndexName(clazz) + INDEX_READ;
if (page.getOffset() > 100000) {
LOG.warn("Large offset={} for ES query", page.getOffset());
}
SearchRequestBuilder esQuery = client.prepareSearch(indexName).setTypes(COMMON_TYPE_NAME).setQuery(toEsQuery(clazz, filter))
.setFrom(page.getOffset()).setSize(page.getPageSize());
// for (Sort.Order order : page.getSort()) {
// esQuery.addSort(SortBuilders.fieldSort(order.getProperty()).order(SortOrder.valueOf(order.getDirection().name())));
// }
SearchResponse searchResponse = esQuery.get();
List<T> content = new ArrayList<>();
searchResponse.getHits().forEach(hit -> {
LOG.trace("Mapping {} id={}", clazz, hit.getId());
T x = loadEntity(clazz, Long.parseLong(hit.getId()));
if (x != null) {
LOG.trace("Adding to results: {}", x);
content.add(x);
} else {
LOG.trace("Got null");
}
});
return new PageImpl<>(content, page, searchResponse.getHits().getTotalHits());
}
@Override
public <T extends BasicModel> void process(Class<T> clazz, BasicModelFilter<?, ?> filter, IBatchAction<T> action, Long maxSize) throws Exception {
String indexName = toIndexName(clazz) + INDEX_READ;
TimeValue scrollTimeout = TimeValue.timeValueSeconds(30);
SearchRequestBuilder esQuery = client.prepareSearch(indexName).setTypes(COMMON_TYPE_NAME).setQuery(toEsQuery(clazz, filter))
.setScroll(scrollTimeout) // Initiate scroll
.setSize(1000); // batch size
esQuery.addSort(SortBuilders.fieldSort("id").order(SortOrder.ASC));
SearchResponse searchResponse = esQuery.get();
SearchHits hits = searchResponse.getHits();
// System.err.println("Got hits " + hits.getTotalHits());
AtomicLong counter = new AtomicLong();
while (hits.getHits().length > 0) {
LOG.debug("Processing {} hits ", hits.getHits().length);
List<T> content = new ArrayList<>();
searchResponse.getHits().forEach(hit -> {
if (maxSize != null && counter.get() > maxSize) {
// don't process anymore
return;
}
LOG.trace("Mapping {} id={}", clazz, hit.getId());
T x = loadEntity(clazz, Long.parseLong(hit.getId()));
if (x != null) {
counter.incrementAndGet();
LOG.trace("Adding to results: {}", x);
content.add(x);
} else {
LOG.trace("Got null");
}
});