changed udpipe output variable from tokens to ud

master
Erik de Vries 6 years ago
parent 061da17c2a
commit 34a6adf64e

@ -4,7 +4,7 @@
#' Type can be either one of three values:
#' set: set the value of [varname] to x
#' add: add x to the values of [varname]
#' varname: When using tokens, the token field will be updated instead of a computerCodes field
#' varname: When using ud, the ud field will be updated instead of a computerCodes field
#' @param x A single-row data frame, or a string containing the variables and/or values that should be updated (a data frame is converted to a JSON object, strings are stored as-is)
#' @param index The name of the Elasticsearch index to update
#' @param varname String indicating the parent variable that should be updated (when it does not exist, it will be created, all varnames are prefixed by computerCodes)
@ -24,10 +24,10 @@ bulk_writer <- function(x, index = 'maml', varname = 'updated_variable', type) {
names(x) <- NULL
json <- toJSON(x[-1], collapse = T)
}
if (varname == "tokens") {
if (varname == "ud") {
return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}}
{ "script" : { "source": "ctx._source.tokens = params.code", "lang" : "painless", "params": { "code": ',json,'}}}')
{ "script" : { "source": "ctx._source.ud = params.code", "lang" : "painless", "params": { "code": ',json,'}}}')
)
}
if (type == 'set') {

@ -25,20 +25,20 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword
ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>%
group_by(doc_id) %>%
summarise(
paragraph_id = list(list(paragraph_id)),
sentence_id = list(list(sentence_id)),
token_id = list(list(as.numeric(token_id))),
lemma = list(list(lemma)),
upos = list(list(upos)),
feats = list(list(feats)),
head_token_id = list(list(as.numeric(head_token_id))),
dep_rel = list(list(dep_rel)),
paragraph_id = list(list(as.integer(paragraph_id))),
sentence_id = list(list(as.integer(sentence_id))),
token_id = list(list(as.integer(token_id))),
lemma = list(list(as.character(lemma))),
upos = list(list(as.character(upos))),
feats = list(list(as.character(feats))),
head_token_id = list(list(as.integer(head_token_id))),
dep_rel = list(list(as.character(dep_rel))),
exists = list(list(TRUE))
)
return(ud)
}
ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores))
bulk <- apply(ud, 1, bulk_writer, varname = 'tokens', type = 'set')
bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set')
res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
return(res)
}

Loading…
Cancel
Save