Commit 51f7b8d5 authored by Artem Hrybeniuk's avatar Artem Hrybeniuk Committed by Matija Obreza
Browse files

Filtered Similarity Search

parent 55857a54
......@@ -454,7 +454,15 @@ public class AccessionController {
@JsonView(JsonViews.Public.class)
public List<Hit<Accession>> getSimilarAccessionsForUUID(@PathVariable("uuid") final UUID uuid) {
Accession accession = accessionService.getByUuid(uuid);
return duplicateFinder.findSimilar(accession);
return duplicateFinder.findSimilar(accession, null);
}
@PreAuthorize("isAuthenticated()")
@PostMapping(value = "/similar/{uuid:\\w{8}\\-\\w{4}.+}", produces = MediaType.APPLICATION_JSON_VALUE)
@JsonView(JsonViews.Public.class)
public List<Hit<Accession>> getSimilarAccessionsForUUID(@PathVariable("uuid") final UUID uuid, @RequestBody(required = false) AccessionFilter filter) {
Accession accession = accessionService.getByUuid(uuid);
return duplicateFinder.findSimilar(accession, filter);
}
@PreAuthorize("isAuthenticated()")
......@@ -765,28 +773,31 @@ public class AccessionController {
/**
* Returns accession json by filter
*
* @param limit - max count of accession returned
* @param filter - filter
* @param params - similarity search params {@link SimilaritySearchParams}
* @return json with minimal accession data
*/
@PostMapping(value = "/find-similar", produces = MediaType.APPLICATION_JSON_VALUE)
public List<SimilarityHit<Accession>> findSimilar(@RequestBody AccessionFilter filter) throws Exception {
public List<SimilarityHit<Accession>> findSimilar(@RequestBody SimilaritySearchParams params) throws Exception {
List<SimilarityHit<Accession>> results = new ArrayList<>();
final long countFiltered = accessionService.countAccessions(filter);
final long countFiltered = accessionService.countAccessions(params.select);
if (countFiltered > 100) {
throw new InvalidApiUsageException("Too many matches for similarity search!");
}
accessionProcessor.process(filter, (accessions) -> {
results.addAll(duplicateFinder.findSimilar(accessions));
accessionProcessor.process(params.select, (accessions) -> {
results.addAll(duplicateFinder.findSimilar(accessions, params.target));
return accessions;
});
return results;
}
public static class SimilaritySearchParams {
public AccessionFilter select; // Which accessions to process
public AccessionFilter target; // What target filter to apply
}
/**
* Get term overview for filters
......
......@@ -351,10 +351,16 @@ public class DescriptorController extends ApiBaseController {
}
@PostMapping(value = "/find-similar", produces = MediaType.APPLICATION_JSON_VALUE)
public List<Hit<Descriptor>> findSimilar(@RequestBody Descriptor target) throws Exception {
public List<Hit<Descriptor>> findSimilar(@RequestBody SimilarRequest similarRequest) throws Exception {
Descriptor target = similarRequest.select;
if (target.getUuid() != null) {
target = descriptorService.loadDescriptor(target.getUuid());
}
return duplicateFinder.findSimilar(target);
return duplicateFinder.findSimilar(target, similarRequest.target);
}
public static class SimilarRequest {
public Descriptor select;
public DescriptorFilter target;
}
}
......@@ -42,7 +42,7 @@ import org.springframework.stereotype.Component;
* Accession Duplicate Finder.
*/
@Component
public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
public class AccessionDuplicateFinder extends DuplicateFinder<Accession, AccessionFilter> {
@Autowired
private AccessionService accessionService;
......@@ -53,7 +53,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
}
@Override
protected List<Accession> getCandidates(Accession target, Collection<Long> excludedById) {
protected List<Accession> getCandidates(Accession target, Collection<Long> excludedById, AccessionFilter additionalFilter) {
assert (target != null);
LOG.info("Searching for duplicates of {}", target.toString());
......@@ -75,13 +75,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
if (StringUtils.isNotBlank(aDonorNumb)) {
var aDonorNumbSplit = spaceStringsAndNumbers(aDonorNumb);
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
// if (StringUtils.isNotBlank(accession.getDonorCode())) {
// filter.institute().code = Set.of(accession.getDonorCode());
......@@ -118,14 +112,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
// By genus and accession name
if (StringUtils.isNotBlank(target.getAccessionName())) {
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
filter.taxa().genus = genusSet;
// filter.taxa().species = Set.of(accession.getTaxonomy().getSpecies());
......@@ -154,14 +141,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
{
// By genus and other IDs
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
filter.taxa().genus = genusSet;
......@@ -202,14 +182,7 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
// By genus and coordinates
if (target.getAccessionId().getLatitude() != null && target.getAccessionId().getLongitude() != null) {
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
AccessionFilter filter = getCandidatesFilter(target, excludedById, candidates, additionalFilter);
filter.taxa().genus = Set.of(taxonomy.getGenus());
......@@ -231,6 +204,20 @@ public class AccessionDuplicateFinder extends DuplicateFinder<Accession> {
return candidates;
}
private AccessionFilter getCandidatesFilter(Accession target, Collection<Long> excludedById, List<Accession> candidates, AccessionFilter additionalFilter) {
AccessionFilter filter = new AccessionFilter(null);
filter.NOT = new AccessionFilter(null);
filter.NOT.id().add(target.getId()); // Not this
if (! CollectionUtils.isEmpty(excludedById)) {
filter.NOT.id().addAll(excludedById);
}
filter.NOT.id().addAll(candidates.stream().map(Accession::getId).collect(Collectors.toSet())); // Not already found
if (additionalFilter != null) {
filter.AND = additionalFilter;
}
return filter;
}
/**
* Score the match against accession. Scoring should be transitive.
*
......
......@@ -40,7 +40,7 @@ import com.hazelcast.internal.util.CollectionUtil;
* @author Matija Obreza
*/
@Component
public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor> {
public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor, DescriptorFilter> {
@Autowired
private DescriptorService descriptorService;
......@@ -51,7 +51,7 @@ public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor> {
}
@Override
protected List<Descriptor> getCandidates(Descriptor target, Collection<Long> excludedById) {
protected List<Descriptor> getCandidates(Descriptor target, Collection<Long> excludedById, DescriptorFilter additionalFilter) {
assert (target != null);
LOG.info("Searching for duplicates of {}", target.toString());
......@@ -60,6 +60,9 @@ public class DescriptorDuplicateFinder extends DuplicateFinder<Descriptor> {
if (StringUtils.isNotBlank(target.getTitle())) {
try {
DescriptorFilter filter = new DescriptorFilter();
if (additionalFilter != null) {
filter.AND = additionalFilter;
}
filter.state(PublishState.PUBLISHED);
if (! CollectionUtils.isEmpty(excludedById)) {
......
......@@ -30,7 +30,8 @@ import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.blocks.model.EntityId;
import org.genesys.blocks.model.EmptyModel;
import org.genesys.blocks.model.filters.EmptyModelFilter;
import org.genesys.taxonomy.checker.StringSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -45,7 +46,7 @@ import com.google.common.cache.CacheBuilder;
/**
* Duplicate Finder base.
*/
public abstract class DuplicateFinder<T extends EntityId> {
public abstract class DuplicateFinder<T extends EmptyModel, F extends EmptyModelFilter<F, T>> {
protected final Logger LOG = LoggerFactory.getLogger(getClass());
......@@ -90,11 +91,11 @@ public abstract class DuplicateFinder<T extends EntityId> {
@Transactional(readOnly = true)
@PreAuthorize("hasRole('ADMINISTRATOR')")
public final List<SimilarityHit<T>> findSimilar(List<T> targets) {
public final List<SimilarityHit<T>> findSimilar(List<T> targets, F filter) {
assert (targets != null);
LOG.warn("Finding duplicates for {} targets", targets.size());
return targets.stream().map((accession) -> new SimilarityHit<T>(accession, findSimilar(accession))).collect(Collectors.toList());
return targets.stream().map((accession) -> new SimilarityHit<T>(accession, findSimilar(accession, filter))).collect(Collectors.toList());
}
/**
......@@ -104,8 +105,8 @@ public abstract class DuplicateFinder<T extends EntityId> {
* @return the list
*/
@Transactional(readOnly = true)
public final List<Hit<T>> findSimilar(T target) {
return findSimilar(target, CollectionUtils.emptyCollection());
public final List<Hit<T>> findSimilar(T target, F filter) {
return findSimilar(target, CollectionUtils.emptyCollection(), filter);
}
/**
......@@ -116,12 +117,12 @@ public abstract class DuplicateFinder<T extends EntityId> {
* @return the list
*/
@Transactional(readOnly = true)
public List<Hit<T>> findSimilar(T target, Collection<Long> excludedById) {
public List<Hit<T>> findSimilar(T target, Collection<Long> excludedById, F filter) {
assert (target != null);
LOG.info("Searching for duplicates of {}", target.toString());
List<T> candidates = getCandidates(target, excludedById);
List<T> candidates = getCandidates(target, excludedById, filter);
if (target.getId() != null) {
candidates.removeIf(candidate -> candidate.getId().equals(target.getId()));
......@@ -178,7 +179,7 @@ public abstract class DuplicateFinder<T extends EntityId> {
* @param excludedById the IDs of excluded entities
* @return list of candidates
*/
protected abstract List<T> getCandidates(final T target, final Collection<Long> excludedById);
protected abstract List<T> getCandidates(final T target, final Collection<Long> excludedById, F filter);
/**
* Convert ES query to a safe ES query by replacing non digit, non word chars
......
......@@ -29,6 +29,7 @@ import org.genesys.catalog.service.PartnerService;
import org.genesys.filerepository.persistence.ImageGalleryPersistence;
import org.genesys.filerepository.service.RepositoryService;
import org.genesys.test.base.AbstractApiTest;
import org.genesys.test.server.services.AbstractElasticServicesTest;
import org.genesys2.server.model.genesys.Accession;
import org.genesys2.server.model.genesys.AccessionId;
import org.genesys2.server.model.genesys.AccessionRemark;
......@@ -51,11 +52,14 @@ import org.genesys2.server.service.worker.AccessionUploader;
import org.junit.After;
import org.junit.Before;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.ContextHierarchy;
import org.springframework.transaction.annotation.Transactional;
/**
* @author Maxym Borodenko
*/
@ContextHierarchy(@ContextConfiguration(name = "api", classes = { AbstractElasticServicesTest.Config.class }))
public abstract class AbstractAccessionControllerTest extends AbstractApiTest {
protected static final int STORAGE_10 = 10;
......
......@@ -2130,6 +2130,64 @@ public class AccessionControllerTest extends AbstractAccessionControllerTest {
assertThat(csvReader.readAll().size(), is(1));
}
/**
* dataProviderId field should be sent back to the frontend when using API calls.
*/
@Test
public void getSimilarAccessionsTest() throws Exception {
assertThat(accessionRepository.count(), is(0L));
Accession withAegis = addAccessionInDB(true, null);
Accession withoutAegis = addAccessionInDB(false, null);
Accession nullAegis = addAccessionInDB(null, null);
withAegis = accessionRepository.save(withAegis);
withoutAegis = accessionRepository.save(withoutAegis);
nullAegis = accessionRepository.save(nullAegis);
assertThat(accessionRepository.count(), is(3L));
/*@formatter:off*/
mockMvc
.perform(post(AccessionController.CONTROLLER_URL + "/similar/{uuid}", nullAegis.getUuid()))
// .andDo(org.springframework.test.web.servlet.result.MockMvcResultHandlers.print())
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_VALUE))
.andExpect(jsonPath("$", hasSize(2)))
;
/*@formatter:on*/
AccessionFilter accessionFilter = new AccessionFilter();
accessionFilter.aegis = false;
/*@formatter:off*/
mockMvc
.perform(post(AccessionController.CONTROLLER_URL + "/similar/{uuid}", nullAegis.getUuid())
.contentType(MediaType.APPLICATION_JSON)
.content(objectMapper.writeValueAsString(accessionFilter)))
// .andDo(org.springframework.test.web.servlet.result.MockMvcResultHandlers.print())
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_VALUE))
.andExpect(jsonPath("$", hasSize(1)))
.andExpect(jsonPath("$[0].aegis", is(false)))
;
/*@formatter:on*/
accessionFilter = new AccessionFilter();
accessionFilter.aegis = true;
/*@formatter:off*/
mockMvc
.perform(post(AccessionController.CONTROLLER_URL + "/similar/{uuid}", nullAegis.getUuid())
.contentType(MediaType.APPLICATION_JSON)
.content(objectMapper.writeValueAsString(accessionFilter)))
// .andDo(org.springframework.test.web.servlet.result.MockMvcResultHandlers.print())
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_VALUE))
.andExpect(jsonPath("$", hasSize(1)))
.andExpect(jsonPath("$[0].aegis", is(true)))
;
/*@formatter:on*/
}
private JsonNode accessionGeo(Double latitude, Double longitude, Double elevation) {
ObjectNode geo = objectMapper.createObjectNode();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment