From 34a6adf64e2f23da6887448758b82cf068d1a4bd Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Wed, 5 Dec 2018 16:51:59 +0100 Subject: [PATCH] changed udpipe output variable from tokens to ud --- R/bulk_writer.R | 6 +++--- R/ud_update.R | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/bulk_writer.R b/R/bulk_writer.R index 126206e..90c8a9f 100644 --- a/R/bulk_writer.R +++ b/R/bulk_writer.R @@ -4,7 +4,7 @@ #' Type can be either one of three values: #' set: set the value of [varname] to x #' add: add x to the values of [varname] -#' varname: When using tokens, the token field will be updated instead of a computerCodes field +#' varname: When using ud, the ud field will be updated instead of a computerCodes field #' @param x A single-row data frame, or a string containing the variables and/or values that should be updated (a data frame is converted to a JSON object, strings are stored as-is) #' @param index The name of the Elasticsearch index to update #' @param varname String indicating the parent variable that should be updated (when it does not exist, it will be created, all varnames are prefixed by computerCodes) @@ -24,10 +24,10 @@ bulk_writer <- function(x, index = 'maml', varname = 'updated_variable', type) { names(x) <- NULL json <- toJSON(x[-1], collapse = T) } - if (varname == "tokens") { + if (varname == "ud") { return( paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} -{ "script" : { "source": "ctx._source.tokens = params.code", "lang" : "painless", "params": { "code": ',json,'}}}') +{ "script" : { "source": "ctx._source.ud = params.code", "lang" : "painless", "params": { "code": ',json,'}}}') ) } if (type == 'set') { diff --git a/R/ud_update.R b/R/ud_update.R index 364595a..97ed87b 100644 --- a/R/ud_update.R +++ b/R/ud_update.R @@ -25,20 +25,20 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>% group_by(doc_id) %>% summarise( - paragraph_id = list(list(paragraph_id)), - sentence_id = list(list(sentence_id)), - token_id = list(list(as.numeric(token_id))), - lemma = list(list(lemma)), - upos = list(list(upos)), - feats = list(list(feats)), - head_token_id = list(list(as.numeric(head_token_id))), - dep_rel = list(list(dep_rel)), + paragraph_id = list(list(as.integer(paragraph_id))), + sentence_id = list(list(as.integer(sentence_id))), + token_id = list(list(as.integer(token_id))), + lemma = list(list(as.character(lemma))), + upos = list(list(as.character(upos))), + feats = list(list(as.character(feats))), + head_token_id = list(list(as.integer(head_token_id))), + dep_rel = list(list(as.character(dep_rel))), exists = list(list(TRUE)) ) return(ud) } ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores)) - bulk <- apply(ud, 1, bulk_writer, varname = 'tokens', type = 'set') + bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set') res <- elastic_update(bulk, es_super = es_super, localhost = localhost) return(res) }