actorizer: add pre_tags and post_tags to argument list

bulk_writer: updated to use _doc doctype
query_gen_actors: added NA for all searches that don't have pre- or postfixes
master
Erik de Vries 5 years ago
parent a1b6c6a7cb
commit 593d2de6e2

@ -13,7 +13,7 @@
#' @export #' @export
#' @examples #' @examples
#' actorizer(out, localhost = F, ids, prefix, postfix, identifier, es_super) #' actorizer(out, localhost = F, ids, prefix, postfix, identifier, es_super)
actorizer <- function(out, localhost = F, ids, prefix, postfix, identifier, es_super, ver) { actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_tags, es_super, ver) {
### Function to filter out false positives using regex ### Function to filter out false positives using regex
exceptionizer <- function(id, ud, doc, markers, pre_tags_regex, post_tags_regex,pre_tags,post_tags, prefix, postfix) { exceptionizer <- function(id, ud, doc, markers, pre_tags_regex, post_tags_regex,pre_tags,post_tags, prefix, postfix) {
min <- min(ud$start[ud$sentence_id == id]) # Get start position of sentence min <- min(ud$start[ud$sentence_id == id]) # Get start position of sentence

@ -27,19 +27,19 @@ bulk_writer <- function(x, index = 'maml', varname, type, ver) {
} }
if (varname == "ud") { if (varname == "ud") {
return( return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}}
{ "script" : { "source": "ctx._source.version = \\"',ver,'\\"; ctx._source.ud = params.code; ctx._source.remove(\\"tokens\\")", "lang" : "painless", "params": { "code": ',json,'}}}') { "script" : { "source": "ctx._source.version = \\"',ver,'\\"; ctx._source.ud = params.code; ctx._source.remove(\\"tokens\\")", "lang" : "painless", "params": { "code": ',json,'}}}')
) )
} }
if (type == 'set') { if (type == 'set') {
return( return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}}
{ "script" : { "source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,', "object": {"',varname,'": ',json,'} }}}') { "script" : { "source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,', "object": {"',varname,'": ',json,'} }}}')
) )
} }
if (type == "add") { if (type == "add") {
return( return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}} paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}}
{"script": {"source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null && ctx._source.computerCodes.containsKey(\\"',varname,'\\")) {ctx._source.computerCodes.',varname,'.addAll(params.code)} else if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,' , "object": {"',varname,'": ',json,'}}}}' {"script": {"source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null && ctx._source.computerCodes.containsKey(\\"',varname,'\\")) {ctx._source.computerCodes.',varname,'.addAll(params.code)} else if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,' , "object": {"',varname,'": ',json,'}}}}'
) )
) )

@ -137,8 +137,9 @@ query_gen_actors <- function(actor, country, pre_tags, post_tags) {
ids <- list(c(actor$`_source.actorId`,str_c(actor$`_source.partyId`,'_a'))) ids <- list(c(actor$`_source.actorId`,str_c(actor$`_source.partyId`,'_a')))
actorid <- actor$`_source.actorId` actorid <- actor$`_source.actorId`
query <- generator(country, actor$`_source.startDate`, actor$`_source.endDate`, query_string, pre_tags, post_tags, actorid) query <- generator(country, actor$`_source.startDate`, actor$`_source.endDate`, query_string, pre_tags, post_tags, actorid)
return(data.frame(query = query, ids = I(ids), stringsAsFactors = F)) return(data.frame(query = query, ids = I(ids), prefix = NA, postfix = NA, stringsAsFactors = F))
} }
### Query generation for party searches ### Query generation for party searches
if (actor$`_source.function` == "Party") { if (actor$`_source.function` == "Party") {
actor$`_source.startDate` <- "2000-01-01" actor$`_source.startDate` <- "2000-01-01"
@ -175,7 +176,7 @@ query_gen_actors <- function(actor, country, pre_tags, post_tags) {
if (country == 'uk') { if (country == 'uk') {
df2 <- data.frame(query = query, ids = I(ids), prefix = actor$`_source.notPrecededBy`, postfix = actor$`_source.notFollowedBy`, stringsAsFactors = F) df2 <- data.frame(query = query, ids = I(ids), prefix = actor$`_source.notPrecededBy`, postfix = actor$`_source.notFollowedBy`, stringsAsFactors = F)
} else { } else {
df2 <- data.frame(query = query, ids = I(ids), stringsAsFactors = F) df2 <- data.frame(query = query, ids = I(ids), prefix = NA, postfix = NA, stringsAsFactors = F)
} }
} }
if (exists('df1') == T & exists('df2') == T) { if (exists('df1') == T & exists('df2') == T) {

Loading…
Cancel
Save