Commit 720482ee authored by Matija Obreza's avatar Matija Obreza
Browse files

AllAccename to AccessionAlias and cleanup

parent c61012b9
......@@ -25,12 +25,16 @@ import javax.persistence.Table;
import javax.persistence.Version;
import org.genesys2.server.model.BusinessModel;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.Store;
/**
* Accession "alias"
*/
@Entity
@Table(name = "accessionalias")
@Indexed
public class AccessionAlias extends BusinessModel {
public static enum AliasType {
......@@ -69,6 +73,7 @@ public class AccessionAlias extends BusinessModel {
private Accession accession;
@Column(name = "name", length = 150)
@Field(name = "title", store = Store.NO)
private String name;
@Column(length = 7)
......
......@@ -26,4 +26,6 @@ public interface AccessionAliasRepository extends JpaRepository<AccessionAlias,
List<AccessionAlias> findByAccession(Accession accession);
List<AccessionAlias> findByAccessionAndAliasType(Accession accession, int aliasType);
}
......@@ -47,4 +47,8 @@ public interface GenesysLowlevelRepository {
void listMetadataAccessions(long id, RowCallbackHandler rowCallbackHandler);
void listAccessionsAlias(String code, RowCallbackHandler rowCallbackHandler);
void listAccessionsAccenames(RowCallbackHandler rowCallbackHandler);
void listAccessionsAlias(RowCallbackHandler rowCallbackHandler);
}
......@@ -248,6 +248,21 @@ public class GenesysLowlevelRepositoryImpl implements GenesysLowlevelRepository
}, rowCallbackHandler);
}
@Override
public void listAccessionsAlias(RowCallbackHandler rowCallbackHandler) {
jdbcTemplate.query(new PreparedStatementCreator() {
@Override
public PreparedStatement createPreparedStatement(Connection con) throws SQLException {
System.err.println("NEW listAccessionAlias PSTMT");
PreparedStatement stmt = con
.prepareStatement("select n.accessionId, n.instCode, n.name, n.aliasType, n.lang, n.version, n.id from accessionalias n order by n.accessionId");
// Set mysql JConnector to stream results
stmt.setFetchSize(Integer.MIN_VALUE);
return stmt;
}
}, rowCallbackHandler);
}
@Override
public void listAccessionsColl(final String instCode, RowCallbackHandler rowCallbackHandler) {
jdbcTemplate.query(new PreparedStatementCreator() {
......@@ -277,4 +292,18 @@ public class GenesysLowlevelRepositoryImpl implements GenesysLowlevelRepository
}
}, rowCallbackHandler);
}
@Override
public void listAccessionsAccenames(RowCallbackHandler rowCallbackHandler) {
jdbcTemplate.query(new PreparedStatementCreator() {
@Override
public PreparedStatement createPreparedStatement(Connection con) throws SQLException {
PreparedStatement stmt = con
.prepareStatement("select accessionId, names, otherIds from accessionname");
// Set mysql JConnector to stream results
stmt.setFetchSize(Integer.MIN_VALUE);
return stmt;
}
}, rowCallbackHandler);
}
}
......@@ -149,4 +149,8 @@ public interface GenesysService {
void removeAliases(List<AccessionAlias> aliases);
void upsertAliases(long accessionId, String acceNames, String otherIds);
void removeAliases(Set<Long> toRemove);
}
......@@ -31,6 +31,8 @@ import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.Predicate;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
......@@ -1017,4 +1019,103 @@ public class GenesysServiceImpl implements GenesysService, TraitService, Dataset
public void removeAliases(List<AccessionAlias> aliases) {
accessionAliasRepository.delete(aliases);
}
@Override
@Transactional
@PreAuthorize("hasRole('ADMINISTRATOR')")
public void upsertAliases(long accessionId, String acceNames, String otherIds) {
Accession accession = getAccession(accessionId);
if (accession == null) {
LOG.error("No such accession " + accessionId);
return;
}
// LOG.info("Updating acceNames=" + acceNames);
List<AccessionAlias> aliases = new ArrayList<AccessionAlias>();
if (StringUtils.isNotBlank(acceNames)) {
String[] acceName = acceNames.split(";");
for (String oi : acceName) {
if (StringUtils.isBlank(oi)) {
continue;
}
AccessionAlias alias = new AccessionAlias();
alias.setAccession(accession);
alias.setName(oi.trim());
alias.setAliasType(AliasType.ACCENAME);
aliases.add(alias);
}
}
ensureAliases(accession, aliases, AliasType.ACCENAME);
// LOG.info("Updating otherIds=" + otherIds);
aliases.clear();
if (StringUtils.isNotBlank(otherIds)) {
String[] otherId = otherIds.split(";");
for (String oi : otherId) {
if (StringUtils.isBlank(oi)) {
continue;
}
String[] oin = oi.split(":", 2);
AccessionAlias alias = new AccessionAlias();
alias.setAccession(accession);
if (oin.length == 1) {
if (StringUtils.isBlank(oin[0])) {
continue;
}
alias.setName(oin[0].trim());
} else {
if (StringUtils.isBlank(oin[1])) {
continue;
}
alias.setInstCode(StringUtils.defaultIfBlank(oin[0].trim(), null));
if (alias.getInstCode() != null && alias.getInstCode().length() > 7) {
LOG.warn("Invalid instCode: " + alias.getInstCode() + " in=" + oi);
continue;
}
alias.setName(oin[1].trim());
}
alias.setAliasType(AliasType.OTHERNUMB);
aliases.add(alias);
}
}
ensureAliases(accession, aliases, AliasType.OTHERNUMB);
}
private void ensureAliases(Accession accession, List<AccessionAlias> aliases, AliasType aliasType) {
List<AccessionAlias> existingAliases = accessionAliasRepository.findByAccessionAndAliasType(accession, aliasType.getId());
// Find aliases to remove
for (final AccessionAlias aa : existingAliases) {
if (null == CollectionUtils.find(aliases, new Predicate<AccessionAlias>() {
@Override
public boolean evaluate(AccessionAlias alias) {
return StringUtils.equals(alias.getName(), aa.getName());
}
})) {
accessionAliasRepository.delete(aa);
}
}
// Add or update
for (final AccessionAlias aa : aliases) {
AccessionAlias accessionAlias = CollectionUtils.find(existingAliases, new Predicate<AccessionAlias>() {
@Override
public boolean evaluate(AccessionAlias alias) {
return StringUtils.equals(alias.getName(), aa.getName());
}
});
if (accessionAlias == null) {
accessionAliasRepository.save(aa);
}
}
}
@Override
@Transactional
public void removeAliases(Set<Long> toRemove) {
for (Long id : toRemove)
this.accessionAliasRepository.delete(id);
}
}
......@@ -28,6 +28,7 @@ import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.genesys2.server.model.genesys.AccessionAlias;
import org.genesys2.server.service.SearchService;
import org.hibernate.search.SearchException;
import org.hibernate.search.jpa.FullTextEntityManager;
......@@ -69,6 +70,9 @@ public class SearchServiceImpl implements SearchService {
lucenePage.setTotalElements(query.getResultSize());
@SuppressWarnings("unchecked")
List<Object> data = query.setMaxResults(pageSize).setFirstResult(page * pageSize).getResultList();
prefetch(data);
lucenePage.setContent(data);
return lucenePage;
......@@ -78,6 +82,17 @@ public class SearchServiceImpl implements SearchService {
}
}
private void prefetch(List<Object> data) {
for (Object o : data) {
if (o instanceof AccessionAlias) {
try {
((AccessionAlias) o).getAccession().getCreatedBy();
} catch (NullPointerException e) {
}
}
}
}
/**
* @param searchQuery
* @return
......
......@@ -17,9 +17,18 @@
package org.genesys2.server.servlet.controller;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.genesys2.server.persistence.domain.GenesysLowlevelRepository;
import org.genesys2.server.service.CountryNamesUpdater;
import org.genesys2.server.service.GenesysService;
import org.genesys2.server.service.GeoService;
......@@ -31,6 +40,7 @@ import org.genesys2.server.service.worker.InstituteUpdater;
import org.genesys2.server.service.worker.SGSVInsertMissing;
import org.genesys2.server.service.worker.SGSVUpdate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
......@@ -71,6 +81,9 @@ public class AdminController {
@Autowired
private InstituteService instituteService;
@Autowired
private GenesysLowlevelRepository genesysLowlevelRepository;
@RequestMapping("/")
public String root() {
return "/admin/index";
......@@ -144,6 +157,120 @@ public class AdminController {
return "redirect:/admin/";
}
@RequestMapping(method = RequestMethod.POST, value = "/convertNames")
public String convertNames() {
// Convert {@link AllAccenames} to Aliases
final List<Object[]> list = new ArrayList<Object[]>(100000);
genesysLowlevelRepository.listAccessionsAccenames(new RowCallbackHandler() {
@Override
public void processRow(ResultSet rs) throws SQLException {
long accessionId = rs.getLong(1);
String acceNames = rs.getString(2);
String otherIds = rs.getString(3);
list.add(new Object[] { accessionId, acceNames, otherIds });
if (list.size() % 10000 == 0) {
LOG.info("Loaded names: " + list.size());
}
}
});
int i = 0;
for (Object[] o : list) {
if (++i % 1000 == 0) {
LOG.info("Conversion progress " + i + " of " + list.size());
}
genesysService.upsertAliases((long) o[0], (String) o[1], (String) o[2]);
}
list.clear();
System.err.println("FOOBAR!");
final Set<Long> toRemove = new HashSet<Long>();
// Remove stupid stuff
// List<Long> aliasesToRemove = new ArrayList<Long>();
genesysLowlevelRepository.listAccessionsAlias(new RowCallbackHandler() {
private long prevAccnId = -1;
private List<Object[]> aliases = new ArrayList<Object[]>(10);
@Override
public void processRow(ResultSet rs) throws SQLException {
System.err.println("..");
// n.accessionId, n.instCode, n.name, n.aliasType, n.lang,
// n.version
if (prevAccnId == rs.getLong(1) || prevAccnId == -1) {
prevAccnId = rs.getLong(1);
System.err.println("Add... " + prevAccnId + " " + rs.getLong(1));
} else {
cleanup(prevAccnId, aliases);
aliases.clear();
prevAccnId = rs.getLong(1);
}
aliases.add(new Object[] { rs.getLong(7), rs.getString(2), rs.getString(3), rs.getInt(4), rs.getString(5) });
}
private void cleanup(long accessionId, List<Object[]> existingAliases) {
System.err.println("CLEANUP:");
for (Object[] alias : existingAliases) {
System.err.println("" + accessionId + " = " + ArrayUtils.toString(alias, "NULL"));
}
for (int i = 0; i < existingAliases.size() - 1; i++) {
Object[] name1 = existingAliases.get(i);
if (toRemove.contains(name1[0]))
continue;
System.err.println("Base " + i + " " + ArrayUtils.toString(name1));
for (int j = i + 1; j < existingAliases.size(); j++) {
System.err.println("Inspecting " + j);
Object[] name2 = existingAliases.get(j);
if (toRemove.contains(name2[0]))
continue;
int res = whatToKeep(name1, name2);
if (res == -1) {
System.err.println("Would remove " + i + " " + ArrayUtils.toString(name1));
toRemove.add((long) name1[0]);
} else if (res == 1) {
System.err.println("Would remove " + j + " " + ArrayUtils.toString(name2));
toRemove.add((long) name2[0]);
}
}
}
}
private int whatToKeep(Object[] name1, Object[] name2) {
if (StringUtils.equals((String) name1[2], (String) name2[2])) {
float score1 = score(name1), score2 = score(name2);
if (score1 < score2) {
return -1;
} else {
return 1;
}
} else
return 0;
}
private float score(Object[] name1) {
float score = 1.0f;
if (name1[1] != null) {
score += 2;
if ((int) name1[3] == 5) {
score *= 2;
}
} else {
if ((int) name1[3] == 0) {
score += 1;
}
}
return score;
}
});
this.genesysService.removeAliases(toRemove);
return "redirect:/admin/";
}
@RequestMapping(method = RequestMethod.POST, value = "/sanitize")
public String sanitize() {
LOG.info("Sanitizing content");
......
......@@ -48,6 +48,9 @@
<form method="post" action="<c:url value="/admin/updateAccessionInstituteRefs" />">
<input type="submit" class="btn btn-default" value="Update accession institute info" />
</form>
<form method="post" action="<c:url value="/admin/convertNames" />">
<input type="submit" class="btn btn-default" value="Convert old names to aliases" />
</form>
<h3>C&E</h3>
<form method="post" action="<c:url value="/admin/refreshMetadataMethods" />">
......
......@@ -45,6 +45,9 @@
<c:when test="${clazz eq 'Accession'}">
<a href="<c:url value="/acn/id/${searchResult.id}" />">${searchResult.accessionName}</a> ${searchResult.taxonomy.taxonName} ${searchResult.instituteCode}
</c:when>
<c:when test="${clazz eq 'AccessionAlias'}">
<a href="<c:url value="/acn/id/${searchResult.accession.id}" />">${searchResult.name}</a> ${searchResult.accession.taxonomy.taxonName} ${searchResult.accession.instituteCode}
</c:when>
<c:when test="${clazz eq 'Crop'}">
<a href="<c:url value="/c/${searchResult.shortName}/data" />">${searchResult.name}</a>
</c:when>
......
package org.genesys2.server.test;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.junit.Test;
public class NamesTest {
@Test
public void testCleanup() {
List<Object[]> names = new ArrayList<Object[]>();
names.add(new Object[] { 1l, null, "Qing ke dou", 0, null });
names.add(new Object[] { 2l, null, "Qing ke dou", 5, null });
names.add(new Object[] { 3l, null, "ZDD05417", 5, null });
names.add(new Object[] { 4l, null, "ZDD05417", 5, null });
names.add(new Object[] { 5l, "BLAHBLA", "ZDD05417", 5, null });
cleanup(names);
}
private void cleanup(List<Object[]> names) {
Set<Long> toRemove = new HashSet<Long>();
for (int i = 0; i < names.size() - 1; i++) {
Object[] name1 = names.get(i);
if (toRemove.contains(name1[0]))
continue;
System.err.println("Base " + i + " " + ArrayUtils.toString(name1));
for (int j = i + 1; j < names.size(); j++) {
System.err.println("Inspecting " + j);
Object[] name2 = names.get(j);
if (toRemove.contains(name2[0]))
continue;
int res = whatToKeep(name1, name2);
if (res == -1) {
System.err.println("Would remove " + i + " " + ArrayUtils.toString(name1));
toRemove.add((long) name1[0]);
} else if (res == 1) {
System.err.println("Would remove " + j + " " + ArrayUtils.toString(name2));
toRemove.add((long) name2[0]);
}
}
}
}
private int whatToKeep(Object[] name1, Object[] name2) {
if (StringUtils.equals((String) name1[2], (String) name2[2])) {
float score1 = score(name1), score2 = score(name2);
if (score1 < score2) {
return -1;
} else {
return 1;
}
} else
return 0;
}
private float score(Object[] name1) {
float score = 1.0f;
if (name1[1] != null) {
score += 2;
if ((int) name1[3] == 5) {
score *= 2;
}
} else {
if ((int) name1[3] == 0) {
score += 1;
}
}
return score;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment