From 34a6adf64e2f23da6887448758b82cf068d1a4bd Mon Sep 17 00:00:00 2001
From: Erik de Vries <erik@devries.pm>
Date: Wed, 5 Dec 2018 16:51:59 +0100
Subject: [PATCH] changed udpipe output variable from tokens to ud

---
 R/bulk_writer.R |  6 +++---
 R/ud_update.R   | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/R/bulk_writer.R b/R/bulk_writer.R
index 126206e..90c8a9f 100644
--- a/R/bulk_writer.R
+++ b/R/bulk_writer.R
@@ -4,7 +4,7 @@
 #' Type can be either one of three values:
 #' set: set the value of [varname] to x
 #' add: add x to the values of [varname]
-#' varname: When using tokens, the token field will be updated instead of a computerCodes field
+#' varname: When using ud, the ud field will be updated instead of a computerCodes field
 #' @param x A single-row data frame, or a string containing the variables and/or values that should be updated (a data frame is converted to a JSON object, strings are stored as-is)
 #' @param index The name of the Elasticsearch index to update
 #' @param varname String indicating the parent variable that should be updated (when it does not exist, it will be created, all varnames are prefixed by computerCodes)
@@ -24,10 +24,10 @@ bulk_writer <- function(x, index = 'maml', varname = 'updated_variable', type) {
     names(x) <- NULL
     json <- toJSON(x[-1], collapse = T)
   }
-  if (varname == "tokens") {
+  if (varname == "ud") {
     return(
       paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}}
-{ "script" : { "source": "ctx._source.tokens = params.code", "lang" : "painless", "params": { "code": ',json,'}}}')
+{ "script" : { "source": "ctx._source.ud = params.code", "lang" : "painless", "params": { "code": ',json,'}}}')
     )
   }
   if (type == 'set') {
diff --git a/R/ud_update.R b/R/ud_update.R
index 364595a..97ed87b 100644
--- a/R/ud_update.R
+++ b/R/ud_update.R
@@ -25,20 +25,20 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword
     ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>%
       group_by(doc_id) %>%
       summarise(
-        paragraph_id = list(list(paragraph_id)),
-        sentence_id = list(list(sentence_id)),
-        token_id = list(list(as.numeric(token_id))),
-        lemma = list(list(lemma)),
-        upos = list(list(upos)),
-        feats = list(list(feats)),
-        head_token_id = list(list(as.numeric(head_token_id))),
-        dep_rel = list(list(dep_rel)),
+        paragraph_id = list(list(as.integer(paragraph_id))),
+        sentence_id = list(list(as.integer(sentence_id))),
+        token_id = list(list(as.integer(token_id))),
+        lemma = list(list(as.character(lemma))),
+        upos = list(list(as.character(upos))),
+        feats = list(list(as.character(feats))),
+        head_token_id = list(list(as.integer(head_token_id))),
+        dep_rel = list(list(as.character(dep_rel))),
         exists = list(list(TRUE))
      )
     return(ud)
   }
   ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores))
-  bulk <- apply(ud, 1, bulk_writer, varname = 'tokens', type = 'set')
+  bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set')
   res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
   return(res)
 }