Commit 995fd77a authored by Matija Obreza's avatar Matija Obreza

Added functions to check taxonomic names and geo data using Genesys Validator

parent e33bb826
Package: genesysr
Version: 0.9.3
Version: 0.9.4
Title: Genesys PGR Client
Description: Access data on plant genetic resources from genebanks around the world published on Genesys (<https://www.genesys-pgr.org>).
Your use of data is subject to terms and conditions available at <https://www.genesys-pgr.org/content/legal/terms>.
......@@ -10,7 +10,8 @@ Maintainer: Matija Obreza <matija.obreza@croptrust.org>
Depends: R (>= 3.1.0)
Imports:
httr,
jsonlite
jsonlite,
dplyr
License: Apache License 2.0
RoxygenNote: 6.1.1
URL: https://gitlab.croptrust.org/genesys-pgr/genesysr
......
......@@ -3,6 +3,9 @@
export(api1_url)
export(api_url)
export(authorization)
export(check_country)
export(check_landorsea)
export(check_taxonomy)
export(client_login)
export(download_mcpd)
export(download_pdci)
......@@ -18,3 +21,5 @@ export(setup_production)
export(setup_sandbox)
export(user_login)
importFrom(utils,browseURL)
importFrom(utils,read.csv)
importFrom(utils,write.table)
#' Check MCPD taxonomic data (GENUS, SPECIES, SPAUTHOR, SUBTAXA, SUBTAUTHOR)
#' using https://validator.genesys-pgr.org.
#'
#' Duplicate input rows are removed using dplyr::distinct() and results are
#' returned for unique rows.
#'
#' @param mcpd Accession passport data in MCPD format
#' @param toCurrentTaxa Should obsoleted names be reported?
#'
#' @examples
#' \dontrun{
#' taxaCheck <- genesysr::check_taxonomy(mcpd)
#' }
#'
#' @return Results from valdator
#' @export
#' @importFrom utils read.csv write.table
check_taxonomy <- function(mcpd, toCurrentTaxa = FALSE) {
DT <- dplyr::distinct(mcpd)
# print(DT)
CSV <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = "")
write.table(DT, file = CSV, row.names = FALSE, dec=".", sep = "\t", quote = FALSE, na = "")
TMP <- tempfile(pattern = "file", tmpdir = tempdir(), fileext = "")
# Requires toCurrentTaxa
response <- httr::POST("https://validator.genesys-pgr.org/process", body = list(
toCurrentTaxa = toCurrentTaxa,
separator = "\t", decimalMark = ".",
encoding = "UTF-8", csvText = readChar(CSV, file.info(CSV)$size)
), encode = "multipart", httr::accept("text/csv"), httr::write_disk(TMP)) # , httr::verbose())
R <- read.csv(TMP, fileEncoding = "UTF-8", sep = "\t")
file.remove(CSV, TMP)
invisible(R)
}
#' Run Land-or-Sea check on MCPD data using https://validator.genesys-pgr.org.
#' Uploads only rows where DECLATITUDE and DECLONGITUDE are provided.
#' In practice it is better to use `check_country` if ORIGCTY data exists.
#'
#' @param mcpd Accession passport data in MCPD format
#'
#' @examples
#' \dontrun{
#' waterCheck <- genesysr::check_landorsea(mcpd)
#' }
#'
#' @return Results from valdator
#' @export
#' @importFrom utils read.csv write.table
check_landorsea <- function(mcpd) {
GEO <- dplyr::filter(mcpd, ! is.na(mcpd$DECLATITUDE) & ! is.na(mcpd$DECLONGITUDE))
# print(GEO)
CSV <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "")
write.table(GEO, file = CSV, row.names = FALSE, dec=".", sep = "\t", quote = FALSE, na = "")
readChar(CSV, file.info(CSV)$size)
TMP <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "")
response <- httr::POST("https://validator.genesys-pgr.org/process", body = list(
validateType = "landorsea",
separator = "\t", decimalMark = ".",
encoding = "UTF-8", csvText = readChar(CSV, file.info(CSV)$size)
), encode = "multipart", httr::accept("text/csv"), httr::write_disk(TMP)) # , httr::verbose())
R <- read.csv(TMP, fileEncoding = "UTF-8", sep = "\t")
# print(R)
file.remove(CSV, TMP)
invisible(R)
}
#' Run Land-or-Sea check on MCPD data. Uploads only rows where ORIGCTY,
#' DECLATITUDE and DECLONGITUDE are provided.
#'
#' @param mcpd Accession passport data in MCPD format
#'
#' @examples
#' \dontrun{
#' geoCheck <- genesysr::check_country(mcpd)
#' }
#'
#' @return Results from valdator
#' @export
#' @importFrom utils read.csv write.table
check_country <- function(mcpd) {
GEO <- dplyr::filter(mcpd, is.character(mcpd$ORIGCTY) & ! is.na(mcpd$DECLATITUDE) & ! is.na(mcpd$DECLONGITUDE))
# print(GEO)
CSV <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "")
write.table(GEO, file = CSV, row.names = FALSE, dec=".", sep = "\t", quote = FALSE, na = "")
readChar(CSV, file.info(CSV)$size)
TMP <- tempfile(pattern = "landorsea", tmpdir = tempdir(), fileext = "")
response <- httr::POST("https://validator.genesys-pgr.org/process", body = list(
validateType = "country",
separator = "\t", decimalMark = ".",
encoding = "UTF-8", csvText = readChar(CSV, file.info(CSV)$size)
), encode = "multipart", httr::accept("text/csv"), httr::write_disk(TMP)) # , httr::verbose())
R <- read.csv(TMP, fileEncoding = "UTF-8", sep = "\t")
# print(R)
file.remove(CSV, TMP)
invisible(R)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment