From 995fd77aa4a3715616604e82f36a691872f3fda9 Mon Sep 17 00:00:00 2001 From: Matija Obreza Date: Wed, 4 Sep 2019 11:29:41 +0200 Subject: [PATCH] Added functions to check taxonomic names and geo data using Genesys Validator --- DESCRIPTION | 5 ++- NAMESPACE | 5 +++ R/validator.R | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 R/validator.R diff --git a/DESCRIPTION b/DESCRIPTION index e22da90..bd18fe1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: genesysr -Version: 0.9.3 +Version: 0.9.4 Title: Genesys PGR Client Description: Access data on plant genetic resources from genebanks around the world published on Genesys (). Your use of data is subject to terms and conditions available at . @@ -10,7 +10,8 @@ Maintainer: Matija Obreza Depends: R (>= 3.1.0) Imports: httr, - jsonlite + jsonlite, + dplyr License: Apache License 2.0 RoxygenNote: 6.1.1 URL: https://gitlab.croptrust.org/genesys-pgr/genesysr diff --git a/NAMESPACE b/NAMESPACE index f04dcac..0436cbf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,9 @@ export(api1_url) export(api_url) export(authorization) +export(check_country) +export(check_landorsea) +export(check_taxonomy) export(client_login) export(download_mcpd) export(download_pdci) @@ -18,3 +21,5 @@ export(setup_production) export(setup_sandbox) export(user_login) importFrom(utils,browseURL) +importFrom(utils,read.csv) +importFrom(utils,write.table) diff --git a/R/validator.R b/R/validator.R new file mode 100644 index 0000000..2b3f42b --- /dev/null +++ b/R/validator.R @@ -0,0 +1,109 @@ + +#' Check MCPD taxonomic data (GENUS, SPECIES, SPAUTHOR, SUBTAXA, SUBTAUTHOR) +#' using https://validator.genesys-pgr.org. +#' +#' Duplicate input rows are removed using dplyr::distinct() and results are +#' returned for unique rows. +#' +#' @param mcpd Accession passport data in MCPD format +#' @param toCurrentTaxa Should obsoleted names be reported? +#' +#' @examples +#' \dontrun{ +#' taxaCheck <- genesysr::check_taxonomy(mcpd) +#' } +#' +#' @return Results from valdator +#' @export +#' @importFrom utils read.csv write.table +check_taxonomy <- function(mcpd, toCurrentTaxa = FALSE) { + DT <- dplyr::distinct(mcpd) + # print(DT) + + CSV <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = "") + write.table(DT, file = CSV, row.names = FALSE, dec=".", sep = "\t", quote = FALSE, na = "") + + TMP <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = "") + + # Requires toCurrentTaxa + response <- httr::POST("https://validator.genesys-pgr.org/process", body = list( + toCurrentTaxa = toCurrentTaxa, + separator = "\t", decimalMark = ".", + encoding = "UTF-8", csvText = readChar(CSV, file.info(CSV)$size) + ), encode = "multipart", httr::accept("text/csv"), httr::write_disk(TMP)) # , httr::verbose()) + + R <- read.csv(TMP, fileEncoding = "UTF-8", sep = "\t") + file.remove(CSV, TMP) + invisible(R) +} + +#' Run Land-or-Sea check on MCPD data using https://validator.genesys-pgr.org. +#' Uploads only rows where DECLATITUDE and DECLONGITUDE are provided. +#' In practice it is better to use `check_country` if ORIGCTY data exists. +#' +#' @param mcpd Accession passport data in MCPD format +#' +#' @examples +#' \dontrun{ +#' waterCheck <- genesysr::check_landorsea(mcpd) +#' } +#' +#' @return Results from valdator +#' @export +#' @importFrom utils read.csv write.table +check_landorsea <- function(mcpd) { + GEO <- dplyr::filter(mcpd, ! is.na(mcpd$DECLATITUDE) & ! is.na(mcpd$DECLONGITUDE)) + # print(GEO) + + CSV <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "") + write.table(GEO, file = CSV, row.names = FALSE, dec=".", sep = "\t", quote = FALSE, na = "") + readChar(CSV, file.info(CSV)$size) + + TMP <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "") + + response <- httr::POST("https://validator.genesys-pgr.org/process", body = list( + validateType = "landorsea", + separator = "\t", decimalMark = ".", + encoding = "UTF-8", csvText = readChar(CSV, file.info(CSV)$size) + ), encode = "multipart", httr::accept("text/csv"), httr::write_disk(TMP)) # , httr::verbose()) + + R <- read.csv(TMP, fileEncoding = "UTF-8", sep = "\t") + # print(R) + file.remove(CSV, TMP) + invisible(R) +} + +#' Run Land-or-Sea check on MCPD data. Uploads only rows where ORIGCTY, +#' DECLATITUDE and DECLONGITUDE are provided. +#' +#' @param mcpd Accession passport data in MCPD format +#' +#' @examples +#' \dontrun{ +#' geoCheck <- genesysr::check_country(mcpd) +#' } +#' +#' @return Results from valdator +#' @export +#' @importFrom utils read.csv write.table +check_country <- function(mcpd) { + GEO <- dplyr::filter(mcpd, is.character(mcpd$ORIGCTY) & ! is.na(mcpd$DECLATITUDE) & ! is.na(mcpd$DECLONGITUDE)) + # print(GEO) + + CSV <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "") + write.table(GEO, file = CSV, row.names = FALSE, dec=".", sep = "\t", quote = FALSE, na = "") + readChar(CSV, file.info(CSV)$size) + + TMP <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "") + + response <- httr::POST("https://validator.genesys-pgr.org/process", body = list( + validateType = "country", + separator = "\t", decimalMark = ".", + encoding = "UTF-8", csvText = readChar(CSV, file.info(CSV)$size) + ), encode = "multipart", httr::accept("text/csv"), httr::write_disk(TMP)) # , httr::verbose()) + + R <- read.csv(TMP, fileEncoding = "UTF-8", sep = "\t") + # print(R) + file.remove(CSV, TMP) + invisible(R) +} -- GitLab