Commit 8af25a0f authored by Matija Obreza's avatar Matija Obreza
Browse files

Merge branch '632-filtered-similarity-search' into 'main'

Resolve "Filtered Similarity Search"

Closes #632

See merge request genesys-pgr/genesys-server!692
parents 86a87b27 51f7b8d5
......@@ -59,7 +59,7 @@
<snippetsDirectory>${project.build.directory}/generated-snippets</snippetsDirectory>
<junit.version>4.13.2</junit.version>
<application.blocks.version>3.2-SNAPSHOT</application.blocks.version>
<application.blocks.version>4.0-SNAPSHOT</application.blocks.version>
<commons.beanutils.version>1.9.4</commons.beanutils.version>
<commons.io.version>2.11.0</commons.io.version>
<commons.lang.version>3.12.0</commons.lang.version>
......
......@@ -15,17 +15,19 @@
*/
package org.genesys.catalog.model.filters;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.ListPath;
import java.io.Serializable;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.genesys.blocks.model.filters.StringFilter;
import org.genesys.catalog.model.dataset.DatasetAccessionRef;
import org.genesys.catalog.model.dataset.QDatasetAccessionRef;
import org.genesys2.server.service.filter.AccessionFilter;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.ListPath;
/**
* The Class AccessionRefFilter.
......@@ -33,7 +35,9 @@ import java.util.Set;
* @author Viacheslav Pavlov
* @author Matija Obreza
*/
public class AccessionRefFilter {
public class AccessionRefFilter implements Serializable {
private static final long serialVersionUID = 7898002699557658474L;
/** The doi. */
public Set<String> doi;
......
......@@ -24,9 +24,10 @@ import java.util.List;
import static org.genesys.catalog.model.vocab.QControlledVocabulary.controlledVocabulary;
public class ControlledVocabularyFilter extends UuidModelFilter<ControlledVocabularyFilter, ControlledVocabulary> {
private static final long serialVersionUID = 5953933222811688208L;
/** The owner. */
public PartnerFilter owner;
......
......@@ -38,6 +38,8 @@ import com.querydsl.core.types.Predicate;
*/
public class DatasetFilter extends UuidModelFilter<DatasetFilter, Dataset> implements IFullTextFilter {
private static final long serialVersionUID = -3014463705200950518L;
/** Any text. */
public String _text;
......
......@@ -14,6 +14,7 @@ import static org.genesys.catalog.model.dataset.QDatasetLocation.datasetLocation
public class DatasetLocationFilter extends UuidModelFilter<DatasetLocationFilter, DatasetLocation> {
private static final long serialVersionUID = -601069622681438809L;
/**
* The longitude.
......
......@@ -39,6 +39,8 @@ import static org.genesys.catalog.model.traits.QDescriptor.descriptor;
*/
public class DescriptorFilter extends UuidModelFilter<DescriptorFilter, Descriptor> implements IFullTextFilter {
private static final long serialVersionUID = -7996157718282752063L;
/** Any text. */
public String _text;
......
......@@ -38,6 +38,8 @@ import static org.genesys.catalog.model.traits.QDescriptorList.descriptorList;
*/
public class DescriptorListFilter extends UuidModelFilter<DescriptorListFilter, DescriptorList> implements IFullTextFilter {
private static final long serialVersionUID = -5524243000542552690L;
/** Any text. */
public String _text;
......
......@@ -39,6 +39,8 @@ import com.querydsl.core.types.Predicate;
*/
public class PartnerFilter extends UuidModelFilter<PartnerFilter, Partner> {
private static final long serialVersionUID = -6235354214065490627L;
/** The short name. */
public Set<String> shortName;
......
......@@ -34,7 +34,7 @@ public interface ShortFilterService {
* @return string with normalized filter
* @throws IOException Signals that an I/O exception has occurred.
*/
String normalizeFilter(SuperModelFilter<?,?> filter) throws IOException;
<T extends SuperModelFilter<T, ?>> String normalizeFilter(T filter) throws IOException;
/**
* Normalize the filter.
......@@ -43,7 +43,7 @@ public interface ShortFilterService {
* @param clazz the clazz
* @throws IOException Signals that an I/O exception has occurred.
*/
<T extends SuperModelFilter<?, ?>> T normalizeFilter(SuperModelFilter<?, ?> filter, Class<T> clazz) throws IOException;
<T extends SuperModelFilter<T, ?>> T normalizeFilter(T filter, Class<T> clazz) throws IOException;
/**
* Load short filter or create a new code.
......@@ -52,7 +52,7 @@ public interface ShortFilterService {
* @return found or created name of the shortened filter object
* @throws IOException
*/
String getCode(SuperModelFilter<?,?> filter) throws IOException;
<T extends SuperModelFilter<T, ?>> String getCode(T filter) throws IOException;
/**
* Load ShortFilter by short name.
......@@ -79,7 +79,7 @@ public interface ShortFilterService {
* @return the Instance of type T with data from JSON
* @throws IOException Signals that an I/O exception has occurred.
*/
<T extends SuperModelFilter<?,?>> T filterByCode(String code, Class<T> clazz) throws IOException;
<T extends SuperModelFilter<T,?>> T filterByCode(String code, Class<T> clazz) throws IOException;
public static class FilterInfo<T> {
public T filter;
......@@ -104,6 +104,6 @@ public interface ShortFilterService {
* @return
* @throws IOException
*/
<T extends SuperModelFilter<?,?>> T readFilter(String json, Class<T> clazz) throws IOException;
<T extends SuperModelFilter<T,?>> T readFilter(String json, Class<T> clazz) throws IOException;
}
......@@ -102,13 +102,13 @@ public class ShortFilterServiceImpl implements ShortFilterService, InitializingB
}
@Override
public <T extends SuperModelFilter<?, ?>> T readFilter(String json, Class<T> clazz) throws IOException {
public <T extends SuperModelFilter<T, ?>> T readFilter(String json, Class<T> clazz) throws IOException {
return BasicModelFilter.normalize(mapper.readValue(json, clazz));
}
@Override
@SuppressWarnings(value = "unchecked")
public String normalizeFilter(final SuperModelFilter<?, ?> filter) throws IOException {
public <T extends SuperModelFilter<T, ?>> String normalizeFilter(final T filter) throws IOException {
// Defaults
SuperModelFilter<?, ?> defaultFilter = null;
......@@ -146,7 +146,7 @@ public class ShortFilterServiceImpl implements ShortFilterService, InitializingB
}
@Override
public <T extends SuperModelFilter<?, ?>> T normalizeFilter(SuperModelFilter<?, ?> filter, Class<T> clazz) throws IOException {
public <T extends SuperModelFilter<T, ?>> T normalizeFilter(T filter, Class<T> clazz) throws IOException {
if (filter == null) {
try {
return clazz.getDeclaredConstructor().newInstance();
......@@ -176,7 +176,7 @@ public class ShortFilterServiceImpl implements ShortFilterService, InitializingB
@Override
@Transactional
public String getCode(final SuperModelFilter<?, ?> filter) throws IOException {
public <T extends SuperModelFilter<T, ?>> String getCode(final T filter) throws IOException {
final String normalizedFilter = normalizeFilter(filter);
ShortFilter shortFilter = loadByJSON(normalizedFilter);
......@@ -250,7 +250,7 @@ public class ShortFilterServiceImpl implements ShortFilterService, InitializingB
@Override
@Transactional(readOnly = true)
public <T extends SuperModelFilter<?, ?>> T filterByCode(String code, Class<T> clazz) throws IOException {
public <T extends SuperModelFilter<T, ?>> T filterByCode(String code, Class<T> clazz) throws IOException {
ShortFilter shortFilter = shortFilterRepository.findByCode(code == null ? "" : code);
if (shortFilter == null) {
......@@ -295,7 +295,7 @@ public class ShortFilterServiceImpl implements ShortFilterService, InitializingB
StringBuilder sb = new StringBuilder();
for (Object p : params) {
if (p instanceof BasicModelFilter) {
BasicModelFilter<?, ?> filter = (BasicModelFilter<?, ?>) p;
BasicModelFilter filter = (BasicModelFilter) p;
try {
sb.append(sfs.getCode(filter));
} catch (IOException e) {
......
......@@ -454,7 +454,15 @@ public class AccessionController {
@JsonView(JsonViews.Public.class)
public List<Hit<Accession>> getSimilarAccessionsForUUID(@PathVariable("uuid") final UUID uuid) {
Accession accession = accessionService.getByUuid(uuid);
return duplicateFinder.findSimilar(accession);
return duplicateFinder.findSimilar(accession, null);
}
@PreAuthorize("isAuthenticated()")
@PostMapping(value = "/similar/{uuid:\\w{8}\\-\\w{4}.+}", produces = MediaType.APPLICATION_JSON_VALUE)
@JsonView(JsonViews.Public.class)
public List<Hit<Accession>> getSimilarAccessionsForUUID(@PathVariable("uuid") final UUID uuid, @RequestBody(required = false) AccessionFilter filter) {
Accession accession = accessionService.getByUuid(uuid);
return duplicateFinder.findSimilar(accession, filter);
}
@PreAuthorize("isAuthenticated()")
......@@ -765,28 +773,31 @@ public class AccessionController {
/**
* Returns accession json by filter
*
* @param limit - max count of accession returned
* @param filter - filter
* @param params - similarity search params {@link SimilaritySearchParams}
* @return json with minimal accession data
*/
@PostMapping(value = "/find-similar", produces = MediaType.APPLICATION_JSON_VALUE)
public List<SimilarityHit<Accession>> findSimilar(@RequestBody AccessionFilter filter) throws Exception {
public List<SimilarityHit<Accession>> findSimilar(@RequestBody SimilaritySearchParams params) throws Exception {
List<SimilarityHit<Accession>> results = new ArrayList<>();
final long countFiltered = accessionService.countAccessions(filter);
final long countFiltered = accessionService.countAccessions(params.select);
if (countFiltered > 100) {
throw new InvalidApiUsageException("Too many matches for similarity search!");
}
accessionProcessor.process(filter, (accessions) -> {
results.addAll(duplicateFinder.findSimilar(accessions));
accessionProcessor.process(params.select, (accessions) -> {
results.addAll(duplicateFinder.findSimilar(accessions, params.target));
return accessions;
});
return results;
}
public static class SimilaritySearchParams {
public AccessionFilter select; // Which accessions to process
public AccessionFilter target; // What target filter to apply
}
/**
* Get term overview for filters
......
......@@ -351,10 +351,16 @@ public class DescriptorController extends ApiBaseController {
}
@PostMapping(value = "/find-similar", produces = MediaType.APPLICATION_JSON_VALUE)
public List<Hit<Descriptor>> findSimilar(@RequestBody Descriptor target) throws Exception {
public List<Hit<Descriptor>> findSimilar(@RequestBody SimilarRequest similarRequest) throws Exception {
Descriptor target = similarRequest.select;
if (target.getUuid() != null) {
target = descriptorService.loadDescriptor(target.getUuid());
}
return duplicateFinder.findSimilar(target);
return duplicateFinder.findSimilar(target, similarRequest.target);
}
public static class SimilarRequest {
public Descriptor select;
public DescriptorFilter target;
}
}
......@@ -54,7 +54,6 @@ import org.springframework.data.jpa.repository.support.Querydsl;
import org.springframework.stereotype.Repository;
import org.springframework.transaction.annotation.Transactional;
import com.querydsl.core.types.dsl.BooleanExpression;
import com.querydsl.core.types.dsl.PathBuilder;
import com.querydsl.core.types.dsl.PathBuilderFactory;
import com.querydsl.jpa.JPAExpressions;
......
......@@ -47,6 +47,7 @@ import org.genesys2.server.service.PGRFANetworkService;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.deser.std.UUIDDeserializer;
import com.hazelcast.internal.util.CollectionUtil;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.ExpressionUtils;
import com.querydsl.core.types.Predicate;
import com.querydsl.jpa.JPQLQuery;
......@@ -337,7 +338,23 @@ public class AccessionFilter extends UuidModelFilter<AccessionFilter, Accession>
}
}
query.where(ExpressionUtils.allOf(predicates));
var builder = new BooleanBuilder(ExpressionUtils.allOf(predicates));
if (NOT != null) {
// This is not a regular NOT operation where not(A & B) = not(A) or not(B)
// This is not(A, B) = not(A) and not(B)
builder.and(ExpressionUtils.anyOf(NOT.collectPredicates()).not());
}
if (AND != null) {
builder.and(AND.buildPredicate());
}
if (OR != null) {
builder.or(OR.buildPredicate());
}
query.where(builder);
return query;
}
......
......@@ -38,7 +38,7 @@ public class ShortFilterProcessor {
@Autowired
protected ShortFilterService shortFilterService;
public <T extends SuperModelFilter<?, ?>> FilterInfo<T> processFilter(final String filterCode, final T filter, Class<T> clazz) throws IOException {
public <T extends SuperModelFilter<T, ?>> FilterInfo<T> processFilter(final String filterCode, final T filter, Class<T> clazz) throws IOException {
Throwable lastException = null;
for (int i = 5; i >=0; i--) {
......@@ -55,7 +55,7 @@ public class ShortFilterProcessor {
throw new IOException("Failed to process filter: " + lastException.getMessage(), lastException);
}
private <T extends SuperModelFilter<?, ?>> FilterInfo<T> doProcessFilter(final String filterCode, final T filter, Class<T> clazz) throws IOException {
private <T extends SuperModelFilter<T, ?>> FilterInfo<T> doProcessFilter(final String filterCode, final T filter, Class<T> clazz) throws IOException {
FilterInfo<T> processedFilter = new FilterInfo<>();
if (filterCode != null) {
......@@ -82,11 +82,11 @@ public class ShortFilterProcessor {
return processedFilter;
}
public <T extends SuperModelFilter<?,?>> T filterByCode(String filterCode, Class<T> clazz) throws IOException {
public <T extends SuperModelFilter<T,?>> T filterByCode(String filterCode, Class<T> clazz) throws IOException {
return shortFilterService.filterByCode(filterCode, clazz);
}
public <T extends SuperModelFilter<?, ?>> T normalizeFilter(T filter, Class<T> filterType) throws IOException {
public <T extends SuperModelFilter<T, ?>> T normalizeFilter(T filter, Class<T> filterType) throws IOException {
return shortFilterService.normalizeFilter(filter, filterType);
}
}
......@@ -42,7 +42,7 @@ import org.springframework.stereotype.Component;
* Accession Duplicate Finder.
*/
@Component
public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
public class AccessionDuplicateFinder extends DuplicateFinder<Accession, AccessionFilter> {
@Autowired
private AccessionService accessionService;
......@@ -53,7 +53,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
}
@Override
protected List<Accession> getCandidates(Accession target, Collection<Long> excludedById) {
protected List<Accession> getCandidates(Accession target, Collection<Long> excludedById, AccessionFilter additionalFilter) {
assert (target != null);
LOG.info("Searching for duplicates of {}", target.toString());
......@@ -75,13 +75,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
if (StringUtils.isNotBlank(aDonorNumb)) {
var aDonorNumbSplit = spaceStringsAndNumbers(aDonorNumb);
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
// if (StringUtils.isNotBlank(accession.getDonorCode())) {
// filter.institute().code = Set.of(accession.getDonorCode());
......@@ -118,14 +112,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
// By genus and accession name
if (StringUtils.isNotBlank(target.getAccessionName())) {
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
filter.taxa().genus = genusSet;
// filter.taxa().species = Set.of(accession.getTaxonomy().getSpecies());
......@@ -154,14 +141,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
{
// By genus and other IDs
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
filter.taxa().genus = genusSet;
......@@ -202,14 +182,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
// By genus and coordinates
if (target.getAccessionId().getLatitude() != null && target.getAccessionId().getLongitude() != null) {
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
filter.taxa().genus = Set.of(taxonomy.getGenus());
......@@ -231,6 +204,20 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
return candidates;
}
private AccessionFilter getCandidatesFilter(Accession target, Collection<Long> excludedById, List<Accession> candidates, AccessionFilter additionalFilter) {
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
if (additionalFilter != null) {
filter.AND = additionalFilter;
}
return filter;
}
/**
* Score the match against accession. Scoring should be transitive.
*
......
......@@ -40,7 +40,7 @@ import com.hazelcast.internal.util.CollectionUtil;
* @author Matija Obreza
*/
@Component
public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor> {
public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor, DescriptorFilter> {
@Autowired
private DescriptorService descriptorService;
......@@ -51,7 +51,7 @@ public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor> {
}
@Override
protected List<Descriptor> getCandidates(Descriptor target, Collection<Long> excludedById) {
protected List<Descriptor> getCandidates(Descriptor target, Collection<Long> excludedById, DescriptorFilter additionalFilter) {
assert (target != null);
LOG.info("Searching for duplicates of {}", target.toString());
......@@ -60,6 +60,9 @@ public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor> {
if (StringUtils.isNotBlank(target.getTitle())) {
try {
DescriptorFilter filter = new DescriptorFilter();
if (additionalFilter != null) {
filter.AND = additionalFilter;
}
filter.state(PublishState.PUBLISHED);
if (! CollectionUtils.isEmpty(excludedById)) {
......
......@@ -30,7 +30,8 @@ import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.EntityId;
import org.genesys.blocks.model.EmptyModel;
import org.genesys.blocks.model.filters.EmptyModelFilter;
import org.genesys.taxonomy.checker.StringSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -45,7 +46,7 @@ import com.google.common.cache.CacheBuilder;
/**
* Duplicate Finder base.
*/
public abstract class DuplicateFinder<T extends EntityId> {
public abstract class DuplicateFinder<T extends EmptyModel, F extends EmptyModelFilter<F, T>> {
protected final Logger LOG = LoggerFactory.getLogger(getClass());
......@@ -90,11 +91,11 @@ public abstract class DuplicateFinder<T extends EntityId> {
@Transactional(readOnly = true)
@PreAuthorize("hasRole('ADMINISTRATOR')")
public final List<SimilarityHit<T>> findSimilar(List<T> targets) {
public final List<SimilarityHit<T>> findSimilar(List<T> targets, F filter) {
assert (targets != null);
LOG.warn("Finding duplicates for {} targets", targets.size());
return targets.stream().map((accession) -> new SimilarityHit<T>(accession, findSimilar(accession))).collect(Collectors.toList());
return targets.stream().map((accession) -> new SimilarityHit<T>(accession, findSimilar(accession, filter))).collect(Collectors.toList());
}
/**
......@@ -104,8 +105,8 @@ public abstract class DuplicateFinder<T extends EntityId> {
* @return the list
*/
@Transactional(readOnly = true)
public final List<Hit<T>> findSimilar(T target) {
return findSimilar(target, CollectionUtils.emptyCollection());
public final List<Hit<T>> findSimilar(T target, F filter) {
return findSimilar(target, CollectionUtils.emptyCollection(), filter);
}
/**
......@@ -116,12 +117,12 @@ public abstract class DuplicateFinder<T extends EntityId> {
* @return the list
*/
@Transactional(readOnly = true)
public List<Hit<T>> findSimilar(T target, Collection<Long> excludedById) {
public List<Hit<T>> findSimilar(T target, Collection<Long> excludedById, F filter) {
assert (target != null);
LOG.info("Searching for duplicates of {}", target.toString());
List<T> candidates = getCandidates(target, excludedById);
List<T> candidates = getCandidates(target, excludedById, filter);
if (target.getId() != null) {
candidates.removeIf(candidate -> candidate.getId().equals(target.getId()));
......@@ -178,7 +179,7 @@ public abstract class DuplicateFinder<T extends EntityId> {
* @param excludedById the IDs of excluded entities
* @return list of candidates
*/
protected abstract List<T> getCandidates(final T target, final Collection<Long> excludedById);
protected abstract List<T> getCandidates(final T target, final Collection<Long> excludedById, F filter);
/**
* Convert ES query to a safe ES query by replacing non digit, non word chars
......
......@@ -59,10 +59,15 @@ public abstract class AbstractServiceTest extends AbstractTest {
@Autowired
public TaxonomyService taxonomyService;
protected FaoInstitute setupInstitute(final String code){
final FaoInstitute inputI = new FaoInstitute();
inputI.setCode(code);
return instituteRepository.save(inputI);
protected FaoInstitute setupInstitute(final String instCode) {
var institute = instituteService.getInstitute(instCode);
if (institute == null) {
final FaoInstitute inputI = new FaoInstitute();
inputI.setCode(instCode);
return instituteRepository.save(inputI);
} else {
return institute;
}
}
protected Accession upsertAccession(final String instCode, final String acceNumb, final String genus) {
......
......@@ -29,6 +29,7 @@ import org.genesys.catalog.service.PartnerService;