From 593d2de6e26eeae3c9b62acb5a6a68e37457d083 Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Wed, 24 Apr 2019 11:57:03 +0200 Subject: [PATCH] actorizer: add pre_tags and post_tags to argument list bulk_writer: updated to use _doc doctype query_gen_actors: added NA for all searches that don't have pre- or postfixes --- R/actorizer.R | 2 +- R/bulk_writer.R | 6 +++--- R/query_gen_actors.R | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/actorizer.R b/R/actorizer.R index 6cbd1de..5664531 100644 --- a/R/actorizer.R +++ b/R/actorizer.R @@ -13,7 +13,7 @@ #' @export #' @examples #' actorizer(out, localhost = F, ids, prefix, postfix, identifier, es_super) -actorizer <- function(out, localhost = F, ids, prefix, postfix, identifier, es_super, ver) { +actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_tags, es_super, ver) { ### Function to filter out false positives using regex exceptionizer <- function(id, ud, doc, markers, pre_tags_regex, post_tags_regex,pre_tags,post_tags, prefix, postfix) { min <- min(ud$start[ud$sentence_id == id]) # Get start position of sentence diff --git a/R/bulk_writer.R b/R/bulk_writer.R index 11f3a50..6dea232 100644 --- a/R/bulk_writer.R +++ b/R/bulk_writer.R @@ -27,19 +27,19 @@ bulk_writer <- function(x, index = 'maml', varname, type, ver) { } if (varname == "ud") { return( - paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} + paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}} { "script" : { "source": "ctx._source.version = \\"',ver,'\\"; ctx._source.ud = params.code; ctx._source.remove(\\"tokens\\")", "lang" : "painless", "params": { "code": ',json,'}}}') ) } if (type == 'set') { return( - paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} + paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}} { "script" : { "source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,', "object": {"',varname,'": ',json,'} }}}') ) } if (type == "add") { return( - paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} + paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}} {"script": {"source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null && ctx._source.computerCodes.containsKey(\\"',varname,'\\")) {ctx._source.computerCodes.',varname,'.addAll(params.code)} else if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,' , "object": {"',varname,'": ',json,'}}}}' ) ) diff --git a/R/query_gen_actors.R b/R/query_gen_actors.R index 31712ee..9205f94 100644 --- a/R/query_gen_actors.R +++ b/R/query_gen_actors.R @@ -137,8 +137,9 @@ query_gen_actors <- function(actor, country, pre_tags, post_tags) { ids <- list(c(actor$`_source.actorId`,str_c(actor$`_source.partyId`,'_a'))) actorid <- actor$`_source.actorId` query <- generator(country, actor$`_source.startDate`, actor$`_source.endDate`, query_string, pre_tags, post_tags, actorid) - return(data.frame(query = query, ids = I(ids), stringsAsFactors = F)) + return(data.frame(query = query, ids = I(ids), prefix = NA, postfix = NA, stringsAsFactors = F)) } + ### Query generation for party searches if (actor$`_source.function` == "Party") { actor$`_source.startDate` <- "2000-01-01" @@ -175,7 +176,7 @@ query_gen_actors <- function(actor, country, pre_tags, post_tags) { if (country == 'uk') { df2 <- data.frame(query = query, ids = I(ids), prefix = actor$`_source.notPrecededBy`, postfix = actor$`_source.notFollowedBy`, stringsAsFactors = F) } else { - df2 <- data.frame(query = query, ids = I(ids), stringsAsFactors = F) + df2 <- data.frame(query = query, ids = I(ids), prefix = NA, postfix = NA, stringsAsFactors = F) } } if (exists('df1') == T & exists('df2') == T) {