Commit 323bbc63 authored by Matija Obreza's avatar Matija Obreza
Browse files

Resolved #34 - Invalid WIEWS Institute URLs result in search engine

indexing errors
parent 16c4ac3b
......@@ -16,8 +16,12 @@
package org.genesys2.server.model.impl;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.persistence.Column;
......@@ -30,6 +34,7 @@ import javax.persistence.OneToMany;
import javax.persistence.Table;
import javax.persistence.UniqueConstraint;
import org.apache.commons.lang.StringUtils;
import org.genesys2.server.model.AclAwareModel;
import org.genesys2.server.model.BusinessModel;
import org.genesys2.server.model.EntityId;
......@@ -69,8 +74,8 @@ public class FaoInstitute extends BusinessModel implements GeoReferencedEntity,
@Column(length = 10)
private String vCode;
@Column(name = "codeSGSV")
private String codeSGSV;
@Column(name = "codeSGSV")
private String codeSGSV;
private boolean current;
......@@ -93,8 +98,8 @@ public class FaoInstitute extends BusinessModel implements GeoReferencedEntity,
private Double elevation;
private boolean uniqueAcceNumbs = true;
@Column(name = "allowMaterialRequests", columnDefinition = "boolean default true", nullable = false)
private boolean allowMaterialRequests = true;
@Column(name = "allowMaterialRequests", columnDefinition = "boolean default true", nullable = false)
private boolean allowMaterialRequests = true;
public FaoInstitute() {
}
......@@ -247,19 +252,81 @@ public class FaoInstitute extends BusinessModel implements GeoReferencedEntity,
this.current = current;
}
public boolean isAllowMaterialRequests() {
return allowMaterialRequests;
}
public boolean isAllowMaterialRequests() {
return allowMaterialRequests;
}
public void setAllowMaterialRequests(boolean allowMaterialRequests) {
this.allowMaterialRequests = allowMaterialRequests;
}
public void setAllowMaterialRequests(boolean allowMaterialRequests) {
this.allowMaterialRequests = allowMaterialRequests;
}
public String getCodeSGSV() {
return codeSGSV;
}
public void setCodeSGSV(String codeSGSV) {
this.codeSGSV = codeSGSV;
}
public String getCodeSGSV() {
return codeSGSV;
}
/**
* Transitive
*
* @throws MalformedURLException
*/
public List<URL> getUrls() throws MalformedURLException {
if (StringUtils.isBlank(this.url)) {
return null;
}
ArrayList<URL> urls = new ArrayList<URL>();
String[] s = this.url.split("[,;\\s]+");
for (String u : s) {
if (StringUtils.isBlank(u)) {
continue;
}
u = u.trim();
URL url = null;
try {
url = new URL(u);
} catch (MalformedURLException e) {
if (e.getMessage().startsWith("no protocol")) {
try {
url = new URL("http://" + u);
} catch (MalformedURLException e2) {
throw e;
}
} else if (e.getMessage().startsWith("unknown protocol")) {
try {
url = new URL("http://" + u.substring(u.indexOf("://") + 3));
} catch (MalformedURLException e2) {
throw e;
}
} else {
throw e;
}
}
if (url != null) {
urls.add(url);
}
}
return urls;
}
public void setCodeSGSV(String codeSGSV) {
this.codeSGSV = codeSGSV;
}
/**
* Same as {@link #getUrls()} above, but no exceptions are thrown
*
* @return
*/
public List<URL> getSafeUrls() {
try {
return getUrls();
} catch (MalformedURLException e) {
// Ignore
return null;
}
}
}
......@@ -85,7 +85,9 @@
--%>
<div class="col-sm-12">
<spring:message code="faoInstitute.url" />:
<a href="<c:out value="${faoInstitute.url}" />"><span property="schema:Organization#sameAs"><c:out value="${faoInstitute.url}" /></span></a>
<c:forEach items="${faoInstitute.safeUrls}" var="url">
<a target="_blank" rel="nofollow" href="<c:out value="${url}" />"><span property="schema:Organization#sameAs"><c:out value="${url}" /></span></a>
</c:forEach>
</div>
</div>
......
/**
* Copyright 2014 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.genesys2.server.model.impl;
import static org.junit.Assert.assertTrue;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.junit.Test;
public class FaoInstituteTest {
@Test
public void testUrlsDefault() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
assertTrue(i.getUrl() == null);
assertTrue(i.getUrls() == null);
}
@Test
public void testUrlsBlank() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl("");
assertTrue(i.getUrl() != null);
assertTrue(i.getUrls() == null);
}
@Test
public void testUrlsBlank2() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl(";");
assertTrue(i.getUrl() != null);
assertTrue(i.getUrls() != null);
assertTrue(i.getUrls().size() == 0);
}
@Test
public void testUrlsValid1() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl("https://www.iita.org");
assertTrue(i.getUrl() != null);
assertTrue(i.getUrls() != null);
assertTrue(i.getUrls().size() == 1);
}
@Test
public void testUrlsValid2() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl("https://www.iita.org; http://genebank.iita.org, ");
assertTrue(i.getUrl() != null);
List<URL> urls = i.getUrls();
assertTrue(urls != null);
assertTrue(urls.size() == 2);
}
@Test
public void testUrlsNoProtocol() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl("www.iita.org");
assertTrue(i.getUrl() != null);
List<URL> urls = i.getUrls();
assertTrue(urls != null);
assertTrue(urls.size() == 1);
assertTrue(urls.get(0).equals(new URL("http://www.iita.org")));
}
@Test
public void testUrlsNoProtocol2() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl("www.iita.org,https://foo.bar;www.somehost.org");
assertTrue(i.getUrl() != null);
List<URL> urls = i.getUrls();
assertTrue(urls != null);
assertTrue(urls.size() == 3);
assertTrue(urls.get(0).equals(new URL("http://www.iita.org")));
assertTrue(urls.get(1).equals(new URL("https://foo.bar")));
assertTrue(urls.get(2).equals(new URL("http://www.somehost.org")));
}
@Test
// http://www.sasa.gov.uk www.scottishlandraces.org.uk
// www.varieties.potato.org.uk www.agricrops.org
public void testUrlsSample1() throws MalformedURLException {
FaoInstitute i = new FaoInstitute();
i.setUrl("http://www.sasa.gov.uk www.scottishlandraces.org.uk www.varieties.potato.org.uk www.agricrops.org");
assertTrue(i.getUrl() != null);
List<URL> urls = i.getUrls();
assertTrue(urls != null);
assertTrue(urls.size() == 4);
assertTrue(urls.get(0).equals(new URL("http://www.sasa.gov.uk")));
assertTrue(urls.get(1).equals(new URL("http://www.scottishlandraces.org.uk")));
assertTrue(urls.get(2).equals(new URL("http://www.varieties.potato.org.uk")));
assertTrue(urls.get(3).equals(new URL("http://www.agricrops.org")));
}
@Test
public void testDump1() throws IOException {
InputStream fis = getClass().getResourceAsStream("/org/genesys2/server/model/impl/wiews-urls.txt");
BufferedReader sr = new BufferedReader(new InputStreamReader(fis));
String l = null;
FaoInstitute i = new FaoInstitute();
while ((l = sr.readLine()) != null) {
int expectedUrls = StringUtils.isBlank(l) ? 0 : l.trim().split("[,;\\s]+").length;
i.setUrl(l);
try {
List<URL> urls = i.getUrls();
if (StringUtils.isBlank(l)) {
assertTrue(urls == null);
} else {
assertTrue(urls != null);
assertTrue(urls.size() == expectedUrls);
}
} catch (MalformedURLException e) {
System.err.println(e.getMessage());
System.err.println(l);
System.err.println(Arrays.asList(l.trim().split("[,;\\s]+")));
}
}
IOUtils.closeQuietly(fis);
}
@Test
public void testFunny1() throws MalformedURLException {
FaoInstitute i=new FaoInstitute();
i.setUrl("www.http://ongrc.org");
List<URL> urls = i.getUrls();
assertTrue(urls.get(0).equals(new URL("http://ongrc.org")));
i.setUrl("htt://www.conagebio.go.cr");
urls = i.getUrls();
assertTrue(urls.get(0).equals(new URL("http://www.conagebio.go.cr")));
}
}
www.sugarcane-breeding-tri.nic.in
www.dofps.gov.bt
http://www.pgrvietnam.org.vn/
www.sumsel.litbang.deptan.go.id
http://www.dicta.hn/norte-y-valles-de-olancho.html
www.sehiveca.com
www.usaid.gov/rw
http://www.cordes-apfel.de
www.condesluz.org.ve
uplb.edu.ph
moe.gov.kh
www.deptan.go.id
http://fipah.org/vallecio.htm
http://www.ars-grin.gov/npgs
www.agridept.gov.lk
http://www.ariel.ac.il/research/rd/about-rd
http://www.squ.edu.om/sci/index.html
http://www.nsseme.com
http//www.chequa.com.vn
http://uplb.edu.ph
http://www.mrizp.co.yu
http://www.fhia.org.hn/
www.balittanah.litbang.deptan.go.id
www.undp.mn
http://www.inta.gov.ar/laconsulta
www.inia.gob.ve
http://www.cita-aragon.es
www.fundacite-Mérida.gob.ve
http://foprideh.org/index.php?option=com_sobi2&Itemid=38
http://www.mides.gob.pa
agridept.gov.lk
www.mai.org
http://www.disaa.unimi.it/ecm/home
www.ipigri.cgiar.org/regions/cwana/regional_networks/central_asia.htm
http://www.ijv.life.ku.dk/faggr/afgrode/pometet.aspx
www.greensl.net
www.cri.lk
http://www.arap.gob.pa
http://www.cimmyt.org/
punjab.gov.pk
http://www.anam.gob.pa
www.agr.ucv.ve
www.moa.gov.bt
http://www.inia.gob.ve
www.inia.gob.ve
http://www.fundespa.org
www.bsu.edu.ph
http://www.dicta.hn/occidental.html
www.biogen.litbang.deptan.go.id
http://www.icvv.es
marnamazonas@gmail.com
www.dnp.go.th
www.fibv.org.ve
http://www.mapya.es
http://www.ias.csic.es
http://www.fundacana.org.ve
http://www.patronatodenutricion.org
http://www.grupocalesa.com
www.puce.edu.ec
www.bsu.edu.ph
http://www.juntadeandalucia.es/agriculturaypesca/ifapa/servlet/FrontController?ec=chipiona
http://www.ccbat.es
http://www.sobs.soton.ac.uk
http://www2.warwick.ac.uk/fac/sci/lifesci/acrc/gru
http://www.ivia.es/sdta/prin14.htm
http://www.agrinetguyana.org.gy/nari1/index.htm
http://www.wwf.de
www.kws.org
http://www.niab.com
www.HegeSaat.de
www.risoe.dk
www.natureandculture.org
http://www.tenerife.es
http://www.sempra.cz
http://www.cebas.csic.es/
http://www.metla.fi
www.cardi.org.kh
www.bot.ku.dk
http://www.pdir.dk
http://www.mtt.fi/english/
http://www.genres.de/bosr/
http://www.sasa.gov.uk www.scottishlandraces.org.uk www.varieties.potato.org.uk www.agricrops.org
www.medioambiente.gov.do
http://www.bordeaux.inra.fr/urefv/
www.daehnfeldt.dk
http://www.vupp.cz
http://www.parc.gov.pk/pgri.html
www.maba-eg.com
http://www.icia.es
http://www.uasd.edu.do
www.sejet.dk
http://www.inta.gov.ar/sanjuan
http://www.uah.es
www.syngenta-seeds.de
http://www.surfuturo.org
http://www.mca.gov.py/inicio.html
http://www.cmssa.cz
http://www.inia.es
http://www.uni-bonn.de/ilb/
http://www.gobcantabria.es
www.puce.edu.ec
http://www.inbar.int/LA_Office/default.htm
http://www.weinheim.de/rundgang/hermannshof.html
http://www.ual.es
http://www.agraria.unimore.it/on-line/Home.html
www.inta.gov.ar
www.norika.de
http://www.biw.kuleuven.be/DTP/TRO/_data/home.htm
http://www.spezialkulturen.at/offiziell
http://www.itga.com
http://www.parquesnacionales.gov.ar
http://www.boomgaardenstichting.be
www.agrecolandes.org
www.fobomade.org.bo
http://www.inta.gov.ar/balcarce
http://www.tu-bs.de/institute/botanik/Bot.Garten.html
http://ccs.brrd.in.th/web/
http://www.bio.bas.bg/botany
www.daebd.org
http://www.uni-bayreuth.de/obg/
http://www.iiu.edu.my/sci
http://www.oocities.com/ryfcrc
www.deptan.go.id
http://www.mlmupc.gov.kh
http://www.witzenhausen.eu
www.cricyt.edu.ar
www.unrc.edu.ar
http://www.gf.vu.lt/depts/botgen.html
http://www.interior.gov.kh
www.fdta-valles.org
http://www.fontagro.org
http://www.panda.org/
www.cardi.org.kh
www.area.gov.ye
http://www.nbpgr.ernet.in
http://www.upea.edu.bo/index.php?option=com_content&view=article&id=92&Itemid=99
http://www.camnet.com.kh/moe
http://www.treeseedfa.org
http://www.minirena.gov.rw/
agridept.gov.lk
http://www.gembloux.ulg.ac.be/pc
http://www.sibta.gov.bo
http://www.searca.org
http://www.inia.gob.pe
http://www.cnpms.embrapa.br
http://ie-umsa.edu.bo/flora/contacts.html
www.fundacionpatino.org
http://www.dicta.hn/el-guanacaste.html
http://www.inta.gov.ar
http://www.clima.uwa.edu.au/
http://www.jki.bund.de/nn_806742/DE/Institute/JKI__ZR__W/ZR__node.html__nnn=true
http://www.belsad.by
http://www.wetlands.org
http://www.iniaf.gob.bo/index.php/en/2012-09-17-22-45-34/2012-10-17-14-28-12/toralapa
http://www.pnsa.edu.kh/
http://www.agric.wa.gov.au/index.htm
http://www.pi.csiro.au
www.prefecturacochabamba.gov.bo
http://www.copa-cogeca.eu
www.area.gov.ye
http://www.grdc.com.au
http://plantbreeding.boku.ac.at/
http://www.nre.vic.gov.au
http://www.uni-leipzig.de/presse2002/apotheker.html
www.agridep.gov.lk
http://www.plantcol.be
www.sagpya.gov.ar www.inase.gov.ar
http://www.forst.tu-dresden.de/Fgarten/fgarten.htm
http://www.koeln-digital.de/parks/flora.htm
http://www.augsburg.de/Seiten/augsburg_d/sport/botan_garten/botan_garten_u/botan_a.shtml
http://www.lebensministerium.at/
www.sagpya.gov.ar www.inase.gov.ar
http://www.uni-erlangen.de/botanischer-garten
www.redesma.org
www.agrobolivia.gov.bo
www.agrobolivia.gov.bo
http://www.bdp-online.de/
http://(http://www.nfv.gwdg.de)
http://www.biologie.uni-ulm.de/extern/guenterstal/index.html
http://www.stmlf.bayern.de/lbp/hopfen/
http://www.saxonet.de/Botanischer_Garten/
www.agrobolivia.gov.bo
www.facfor.unam.edu.ar
http://www.ea.gov.au
http://www.wuppertal.de/bgw
http://www.proinpa.org
www.cabmin.gov.az
www.aopeb.org
punjab.gov.pk
www.syngentafoundation.org
http://www.wilhelmshaven.de/tourist/index.asp?Auswahl=8&Anzeige=800&Nr=18
punjab.gov.pk
www.regionpiura.gob.pe
www.oauife.edu.ng
www.moa.gov.bd
www.unc.edu.pe
www.prousodiversitas.entitatsbcn.net
www.comunidadandina.org
http://www.fontagro.org/
http://www.upm.edu.my/
www.inta.gov.ar/mercedes
www.iiap.org.pe
http://www.cgn.wur.nl
punjab.gov.pk
http://www.agruni.edu.ge
www.ier.ml
www.unc.edu.pe
www.regionarequipa.gob.pe
http://www.ipgri.cgiar.org/regions/apo/
www.apen.org.ni
punjab.gov.pk
www.acicafoc.net
www.eauxetforets.gov.ma
http://www-bgard.sci.kun.nl/bgard/
http://www.mpob.gov.my
www.inia.gob.pe
www.inia.gob.pe
http://takelaka.dts.mg/fofifa
http://takelaka.dts.mg/fofifa
http://www.ukm.my/ukmportal/akademik/fakulti/fst.htm
www.senasa.gob.pe
http://www.ukm.my/ukmportal/akademik/fakulti/fst.htm
http://www.koko.gov.my
http://www.mardi.my
www.fao.org
www.yasadngo.org
www.unsm.edu.pe
www.inia.gob.pe
iiap.org.pe
www.inia.gob.pe
punjab.gov.pk
http://ecologia.uat.mx
www.unt.edu.ar
www.aca.com.uy
www.unheval.edu.pe
www.inia.gob.pe
http://www.squ.edu.om/sci/Biol/index.html
www.maf.org.om
http://www.mgap.gub.uy/Forestal/DGF.htm
www.inase.org.uy
www.bjri.gov.bd
http://www.csrl.ars.usda.gov/nrgeec/nrgeec.html
http://www.macfdn.org
www.fao.org
http://www.procisur.org.uy/online/inicial.asp
http://www.ars-grin.gov/ars/PacWest/Pullman/GenStock/pea/MyHome.html
www.inia.org.uy
www.fagro.edu.uy
www.ipigri.cgiar.org/regions/cwana/regional_networks/central_asia.htm
http://www.secid.org
http://www.fagro.edu.uy/~crs/
www.mgap.gub.uy
www.rialseeds.co.ug
www.cmu.ac.th
Www.pgrvietnam.org.vn
www.dnp.go.th
www.msu.ac.th
www.mrree.gub.uy
http://www.uky.edu/Ag/Agronomy/Department/CloverGC
http://www.kku.ac.th
http://www.ipst.ac.th
www.doae.go.th.
www.inase.org.uy
http://www.nazillipamuk.gov.tr
www.mai-yemen.org
www.doae.go.th
webmaster@bar.gov.ph
http://www.fc.up.pt/
www.dinama.gub.uy
http://www.medioambiente.gov.ar/biodiversidad
http://www.dgadr.pt/
http://www.ceuta.org.uy
http://www2.iict.pt/index.php?idc=41
http://www.nysaes.cornell.edu/
http://www.arboretum.ch/
www.agr.una.py
http://www.uc.pt/iav
http://www.centa.gob.sv
http://www.mne.gov.pt
http://www.quirino.gov.ph
http://www.centa.gob.sv
http://www.draro.pt/
http://www.op.gov.ph/
www.dswd.gov.ph
http://www.centa.gob.sv
http://ptn.brrd.in.th/web/
www.seam.gov.py
www.doa.go.th
www.doa.go.th
http://www.iict.pt/estrutura/vest01.asp?dep=22
www.seam.gov.py
http://agri10.norminet.org.ph/
www.regiontumbes.gob.pe
www.inrena.gob.pe
wwww.farlopsur.com
http://www.inia.min-agricultura.pt
www.ird.sn
http://www.agrosel.ro
www.cipotato.org
http://www.associacaopato.org/