V 0.1 for elasticizer function with updater support

master
Erik de Vries 6 years ago
parent a273524105
commit 217ee76568

@ -8,7 +8,8 @@ Depends: R (>= 3.4.4),
jsonlite, jsonlite,
parallel, parallel,
tidyverse, tidyverse,
quanteda quanteda,
httr
License: Copyright Erik de Vries License: Copyright Erik de Vries
Encoding: UTF-8 Encoding: UTF-8
LazyData: true LazyData: true

@ -13,7 +13,7 @@
################################################################################################# #################################################################################################
#################################### Get data from ElasticSearch ################################ #################################### Get data from ElasticSearch ################################
################################################################################################# #################################################################################################
elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL){ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL, ...){
connect(es_port = 443, connect(es_port = 443,
es_transport = 'https', es_transport = 'https',
es_host = 'linux01.uis.no', es_host = 'linux01.uis.no',
@ -22,7 +22,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
es_pwd = es_pwd, es_pwd = es_pwd,
errors = 'complete') errors = 'complete')
# Get all results - one approach is to use a while loop # Get all results - one approach is to use a while loop
if (src == T || length(update) > 0 ) { if (src == T) {
res <- Search(index = index, time_scroll="5m",body = query, size = 1000, raw=T) res <- Search(index = index, time_scroll="5m",body = query, size = 1000, raw=T)
} }
if (src == F) { if (src == F) {
@ -30,7 +30,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
} }
json <- fromJSON(res) json <- fromJSON(res)
if (json$hits$total == 0) { if (json$hits$total == 0) {
return("No results found") return(json)
} else { } else {
out <- jsonlite:::flatten(json$hits$hits) out <- jsonlite:::flatten(json$hits$hits)
total <- json$hits$total total <- json$hits$total
@ -38,7 +38,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
batch <- 1 batch <- 1
print(paste0('Processing documents ',batch*1000-1000,' through ',batch*1000,' out of ',total,' documents.')) print(paste0('Processing documents ',batch*1000-1000,' through ',batch*1000,' out of ',total,' documents.'))
if (length(update) > 0){ if (length(update) > 0){
update() update(out, ...)
} }
while(hits != 0){ while(hits != 0){
res <- scroll(json$`_scroll_id`, time_scroll="5m", raw=T) res <- scroll(json$`_scroll_id`, time_scroll="5m", raw=T)
@ -48,12 +48,17 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
batch <- batch+1 batch <- batch+1
print(paste0('Processing documents ',batch*1000-1000,' through ',batch*1000,' out of ',total,' documents.')) print(paste0('Processing documents ',batch*1000-1000,' through ',batch*1000,' out of ',total,' documents.'))
if (length(update) > 0){ if (length(update) > 0){
update() out <- jsonlite:::flatten(json$hits$hits)
update(out, ...)
} else { } else {
out <- bind_rows(out, jsonlite:::flatten(json$hits$hits)) out <- bind_rows(out, jsonlite:::flatten(json$hits$hits))
} }
} }
} }
if (length(update) > 0) {
return("Done updating")
} else {
return(out) return(out)
} }
} }
}

@ -5,7 +5,8 @@
\title{Generate a data frame out of unparsed Elasticsearch JSON} \title{Generate a data frame out of unparsed Elasticsearch JSON}
\usage{ \usage{
elasticizer(query, src = T, index = "maml", elasticizer(query, src = T, index = "maml",
es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL) es_pwd = .rs.askForPassword("Elasticsearch READ"), update = NULL,
...)
} }
\arguments{ \arguments{
\item{query}{A JSON-formatted query in the Elasticsearch query DSL} \item{query}{A JSON-formatted query in the Elasticsearch query DSL}

Loading…
Cancel
Save