From 217ee76568b4e367887ee796a7981076953058ee Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Tue, 23 Oct 2018 14:28:37 +0200 Subject: [PATCH] V 0.1 for elasticizer function with updater support --- DESCRIPTION | 3 ++- R/elasticizer.R | 17 +++++++++++------ man/elasticizer.Rd | 3 ++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b7ea14f..de9e044 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,8 @@ Depends: R (>= 3.4.4), jsonlite, parallel, tidyverse, - quanteda + quanteda, + httr License: Copyright Erik de Vries Encoding: UTF-8 LazyData: true diff --git a/R/elasticizer.R b/R/elasticizer.R index 9bc14c3..68fcda0 100644 --- a/R/elasticizer.R +++ b/R/elasticizer.R @@ -13,7 +13,7 @@ ################################################################################################# #################################### Get data from ElasticSearch ################################ ################################################################################################# -elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL){ +elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL, ...){ connect(es_port = 443, es_transport = 'https', es_host = 'linux01.uis.no', @@ -22,7 +22,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw es_pwd = es_pwd, errors = 'complete') # Get all results - one approach is to use a while loop - if (src == T || length(update) > 0 ) { + if (src == T) { res <- Search(index = index, time_scroll="5m",body = query, size = 1000, raw=T) } if (src == F) { @@ -30,7 +30,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw } json <- fromJSON(res) if (json$hits$total == 0) { - return("No results found") + return(json) } else { out <- jsonlite:::flatten(json$hits$hits) total <- json$hits$total @@ -38,7 +38,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw batch <- 1 print(paste0('Processing documents ',batch*1000-1000,' through ',batch*1000,' out of ',total,' documents.')) if (length(update) > 0){ - update() + update(out, ...) } while(hits != 0){ res <- scroll(json$`_scroll_id`, time_scroll="5m", raw=T) @@ -48,12 +48,17 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw batch <- batch+1 print(paste0('Processing documents ',batch*1000-1000,' through ',batch*1000,' out of ',total,' documents.')) if (length(update) > 0){ - update() + out <- jsonlite:::flatten(json$hits$hits) + update(out, ...) } else { out <- bind_rows(out, jsonlite:::flatten(json$hits$hits)) } } } - return(out) + if (length(update) > 0) { + return("Done updating") + } else { + return(out) + } } } diff --git a/man/elasticizer.Rd b/man/elasticizer.Rd index 58ed520..c69975f 100644 --- a/man/elasticizer.Rd +++ b/man/elasticizer.Rd @@ -5,7 +5,8 @@ \title{Generate a data frame out of unparsed Elasticsearch JSON} \usage{ elasticizer(query, src = T, index = "maml", - es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL) + es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL, + ...) } \arguments{ \item{query}{A JSON-formatted query in the Elasticsearch query DSL}