...
 
Commits (5)
......@@ -5,3 +5,4 @@
man
README.Rmd
cran-comments.md
inst/doc
......@@ -15,3 +15,6 @@ License: Apache License 2.0
RoxygenNote: 6.0.1
URL: https://gitlab.croptrust.org/genesys-pgr/genesysr
BugReports: https://gitlab.croptrust.org/genesys-pgr/genesysr/issues
Suggests: knitr,
rmarkdown
VignetteBuilder: knitr
......@@ -77,9 +77,9 @@ authorization <- function(authorization) {
}
#' Ensure that environment has OAuth token
check_auth <- function() {
.check_auth <- function() {
if (is.null(.genesysEnv$Authorization)) {
stop("You must first authorize with Genesys with user_login or client_login.");
warning("You must first authorize with Genesys with user_login() or client_login(...).");
}
}
......@@ -155,8 +155,8 @@ client_login <- function() {
}
api_call <- function(path, method = "get") {
check_auth()
.api_call <- function(path, method = "get") {
.check_auth()
resp <- httr::GET(api_url(path), httr::add_headers(
Authorization = .genesysEnv$Authorization
)
......@@ -184,8 +184,8 @@ api_url <- function(path) {
paste0(.genesysEnv$server, "/api/v0", path)
}
get <- function(path, query = NULL) {
check_auth()
.get <- function(path, query = NULL) {
.check_auth()
resp <- httr::GET(api_url(path), query = query, httr::add_headers(
Authorization = .genesysEnv$Authorization
))
......@@ -203,8 +203,8 @@ get <- function(path, query = NULL) {
#' @param content.type Content-Type of the body
#'
#' @return httr response
post <- function(path, query = NULL, body = NULL, content.type = "application/json") {
check_auth()
.post <- function(path, query = NULL, body = NULL, content.type = "application/json") {
.check_auth()
content <- jsonlite::toJSON(body)
if (! is.null(body) && length(body) == 0) {
# If body is provided, but has length of 0
......
......@@ -12,15 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#' Default page size
FETCH_PAGESIZE <- 1000
MAX_ALLOWED_PAGES <- 500
#' Max pages to retrieve
.MAX_ALLOWED_PAGES <- 500
#' Who am i?
#'
#' @export
me <- function() {
resp <- api_call("/me")
resp <- .api_call("/me")
message(jsonlite::toJSON(resp, pretty = TRUE))
invisible(resp)
}
......@@ -35,17 +34,21 @@ me <- function() {
#' @seealso \code{\link{mcpd_filter}}
#'
#' @examples
#' \dontrun{
#' # Retrieve accession data by country of origin
#' accessions <- fetch_accessions(mcpd_filter(ORIGCTY = c("DEU", "SVN")))
#'
#' }
#'
#' @return Paged data structure
fetch_accessions_page <- function(filters = list(), page = 0, size = FETCH_PAGESIZE, selector = NULL) {
resp <- post(path = "/acn/filter", query=list(p = page, s = size), body = filters)
.fetch_accessions_page <- function(filters = list(), page = 0, size = 1000, selector = NULL) {
start_time <- as.numeric(as.numeric(Sys.time())*1000, digits=15)
resp <- .post(path = "/acn/filter", query=list(p = page, l = size), body = filters)
if (httr::status_code(resp) != 200) {
stop("Genesys responded with HTTP status code ", httr::status_code(resp), ". Expected 200.")
}
end_time <- as.numeric(as.numeric(Sys.time())*1000, digits=15)
paged <- jsonlite::fromJSON(httr::content(resp, "text"), simplifyVector = FALSE)
message(paste("Retrieved page", page + 1, "of", paged$totalPages, "with", paged$numberOfElements, "rows"))
message(paste("Retrieved page", page + 1, "of", paged$totalPages, "with", paged$numberOfElements, "rows in", end_time - start_time, "ms."))
# Apply selector
if (is.function(selector)) {
......@@ -60,8 +63,10 @@ fetch_accessions_page <- function(filters = list(), page = 0, size = FETCH_PAGES
#' @param size number of records to load per page (page size)
#' @param page the page index (0-based)
#' @param selector NULL or a function to "select" variables of interest
#' @param at.least stop fetching when at.least records are received from Genesys
#'
#' @examples
#' \dontrun{
#' # Retrieve all accession data by country of origin
#' accessions <- fetch_accessions(mcpd_filter(ORIGCTY = c("DEU", "SVN")))
#'
......@@ -72,25 +77,27 @@ fetch_accessions_page <- function(filters = list(), page = 0, size = FETCH_PAGES
#' accessions <- fetch_accessions(mcpd_filter(ORIGCTY = c("DEU", "SVN")), selector = function(x) {
#' list(id = x$id, acceNumb = x$acceNumb, instCode = x$institute$code)
#' })
#'
#' }
#'
#' @export
#' @return Paged data structure
fetch_accessions <- function(filters = list(), page = NULL, size = FETCH_PAGESIZE, selector = NULL) {
fetch_accessions <- function(filters = list(), page = NULL, size = 1000, selector = NULL, at.least = NULL) {
if (! is.null(page)) {
# Fetch page
return(fetch_accessions_page(filters, page, size, selector));
return(.fetch_accessions_page(filters, page, size, selector));
}
# Fetch first page to determine number of records
paged <- fetch_accessions_page(filters, page = 0, size, selector)
paged <- .fetch_accessions_page(filters, page = 0, size, selector)
pages <- paged$totalPages
for (page in 1:pages) {
if (page > MAX_ALLOWED_PAGES) {
if (page > .MAX_ALLOWED_PAGES) {
# Break if over max pages
return(page)
message(paste("Not requesting data after page", .MAX_ALLOWED_PAGES, "Stopping."))
break
}
p <- fetch_accessions_page(filters, page, size, selector)
p <- .fetch_accessions_page(filters, page, size, selector)
paged$content <- c(paged$content, p$content)
paged$last <- p$last
......@@ -100,6 +107,10 @@ fetch_accessions <- function(filters = list(), page = NULL, size = FETCH_PAGESIZ
# print("Got last page")
break
}
if (! is.null(at.least) && at.least <= paged$numberOfElements) {
message(paste("Receved", paged$numberOfElements, "of", at.least, "requested. Stopping."))
break
}
}
paged
......
---
title: "genesysr Tutorial"
author: "Nora Castaneda & Matija Obreza"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{genesysr Tutorial}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
Querying Genesys PGR
=====================
[Genesys PGR](https://www.genesys-pgr.org) is the global database on plant genetic resources
maintained *ex situ* in national, regional and international genebanks around the world.
**genesysr** uses the [Genesys API](https://www.genesys-pgr.org/doc/0/apis) to query Genesys data.
Accessing data with **genesysr** is similar to downloading data in CSV or Excel format and loading
it into R.
## For the impatient
Accession passport data is retrieved with the `fetch_accessions` function.
The database is queried by providing a `filter` (see Filters below):
```
## Setup: use Genesys Sandbox environment
# genesysr::setup_sandbox()
# genesysr::setup_production() # This is initialized by default when loading genesysr
# Open a browser: login to Genesys and authorize access
genesysr::user_login()
# Retrieve all accession data for genus *Musa*
musa <- fetch_accessions(filters = list(taxonomy.genus = c('Musa')))
# Retrieve all accession data for the Musa International Transit Center, Bioversity International
itc <- fetch_accessions(list(institute.code = c('BEL084')))
# Retrieve all accession data for the Musa International Transit Center, Bioversity International (BEL084) and the International Center for Tropical Agriculture (COL003)
some <- fetch_accessions(list(institute.code = c('BEL084','COL003')))
```
**genesysr** provides utility functions to create `filter` objects using [Multi-Crop Passport Descriptors (MCPD)](https://www.genesys-pgr.org/doc/0/basics#mcpd) definitions:
```
# Retrieve data by country of origin (MCPD)
fetch_accessions(mcpd_filter(ORIGCTY = c("DEU", "SVN")))
```
# Processing fetched data
Fetched data is provided as a deeply nested list. To flatten the list, the following steps are suggested:
```r
require(tidyverse)
musa.flatten <- lapply(musa$content, unlist) # looks good
musa.flatten <- musa.flatten %>% map_df(bind_rows)
```
# Filters
The records returned by Genesys match all filters provided (*AND* operation), while individual filters
allow for specifying multiple criteria (*OR* operation):
```r
# (genus == Musa) AND ((origcty == NGA) OR (origcty == CIV))
filter <- list(taxonomy.genus = c('Musa'), orgCty.iso3 = c('NGA', 'CIV'))
```
There are a number of filtering options to retrieve current data from Genesys.
The `filter` object is a named `list()` where names match a Genesys filter and the value
specifies the criteria to match.
### Taxonomy
`taxonomy.genus` filters by a *list* of genera.
```r
filters <- list(taxonomy.genus = c('Hordeum', 'Musa'))
```
`taxonomy.species` filters by a *list* of species.
```r
filters <- list(taxonomy.genus = c('Hordeum'), taxonomy.species = c('vulgare'))
```
### Origin of material
`orgCty.iso3` filters by ISO3 code of country of origin of PGR material.
```r
# Material originating from Germany (DEU) and France (FRA)
filters <- list(orgCty.iso3 = c('DEU', 'FRA'))
```
`geo.latitude` and `geo.longitude` filters by latitude/longitude (in decimal format) of the
collecting site.
```r
# TBD
filters <- list(geo.latitude = genesysr::range(-10, 30), geo.longitude = genesysr::range(30, 50))
```
### Holding institute
`institute.code` filters by a *list* of FAO WIEWS institute codes of the holding institutes.
```r
# Filter for ITC (BEL084) and CIAT (COL003)
list(institute.code = c('BEL084', 'COL003'))
```
`institute.country.iso3` filters by a *list* of ISO3 country codes of country of the holding institute.
```r
# Filter for genebanks in Slovenia (SVN) and Belgium (BEL)
list(institute.country.iso3 = c('SVN', 'BEL'))
```
# Selecting columns
Genesys API returns a lot of variables for accession passport data.
To reduce the amount of data to be processed and kept in memory, select the columns of interest with a `selector` function:
```
# Keep only accession id, acceNumb and instCode for *Musa* data
fetch_accessions(list(taxonomy.genus = c('Musa')), selector = function(x) {
list(id = x$id, acceNumb = x$acceNumb, instCode = x$institute$code)
})
```
To list the variable names returned by the Genesys APIs, test the response and select columns of interest:
```r
filters <- list()
accn <- fetch_accessions(filters)
# Print names used in JSON response from Genesys
sort(unique(names(unlist(accn$content))))
```
# Step-by-step example
Let's take a look of all the process of fetching accession passport data from Genesys.
1. Load genesysr
```r
library(genesysr)
```
2. Setup using user credentials
```r
setup_sandbox()
user_login()
```
3. Fetch data
```r
musa <- genesysr::fetch_accessions(list(taxonomy.genus = c('Musa')))
```
4. Flatten data into data frame
```r
require(tidyverse)
musa.flatten <- lapply(musa$content, unlist) #looks good
musa.flatten <- musa.flatten %>% map_df(bind_rows)
```