actorizer: add pre_tags and post_tags to argument list

bulk_writer: updated to use _doc doctype
query_gen_actors: added NA for all searches that don't have pre- or postfixes
master
Erik de Vries 5 years ago
parent a1b6c6a7cb
commit 593d2de6e2

@ -13,7 +13,7 @@
#' @export
#' @examples
#' actorizer(out, localhost = F, ids, prefix, postfix, identifier, es_super)
actorizer <- function(out, localhost = F, ids, prefix, postfix, identifier, es_super, ver) {
actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_tags, es_super, ver) {
### Function to filter out false positives using regex
exceptionizer <- function(id, ud, doc, markers, pre_tags_regex, post_tags_regex,pre_tags,post_tags, prefix, postfix) {
min <- min(ud$start[ud$sentence_id == id]) # Get start position of sentence

@ -27,19 +27,19 @@ bulk_writer <- function(x, index = 'maml', varname, type, ver) {
}
if (varname == "ud") {
return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}}
paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}}
{ "script" : { "source": "ctx._source.version = \\"',ver,'\\"; ctx._source.ud = params.code; ctx._source.remove(\\"tokens\\")", "lang" : "painless", "params": { "code": ',json,'}}}')
)
}
if (type == 'set') {
return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}}
paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}}
{ "script" : { "source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,', "object": {"',varname,'": ',json,'} }}}')
)
}
if (type == "add") {
return(
paste0('{"update": {"_index": "',index,'", "_type": "doc", "_id": "',x[1],'"}}
paste0('{"update": {"_index": "',index,'", "_type": "_doc", "_id": "',x[1],'"}}
{"script": {"source": "ctx._source.version = \\"',ver,'\\"; if (ctx._source.computerCodes != null && ctx._source.computerCodes.containsKey(\\"',varname,'\\")) {ctx._source.computerCodes.',varname,'.addAll(params.code)} else if (ctx._source.computerCodes != null) {ctx._source.computerCodes.',varname,' = params.code} else {ctx._source.computerCodes = params.object}", "lang" : "painless", "params": { "code": ',json,' , "object": {"',varname,'": ',json,'}}}}'
)
)

@ -137,8 +137,9 @@ query_gen_actors <- function(actor, country, pre_tags, post_tags) {
ids <- list(c(actor$`_source.actorId`,str_c(actor$`_source.partyId`,'_a')))
actorid <- actor$`_source.actorId`
query <- generator(country, actor$`_source.startDate`, actor$`_source.endDate`, query_string, pre_tags, post_tags, actorid)
return(data.frame(query = query, ids = I(ids), stringsAsFactors = F))
return(data.frame(query = query, ids = I(ids), prefix = NA, postfix = NA, stringsAsFactors = F))
}
### Query generation for party searches
if (actor$`_source.function` == "Party") {
actor$`_source.startDate` <- "2000-01-01"
@ -175,7 +176,7 @@ query_gen_actors <- function(actor, country, pre_tags, post_tags) {
if (country == 'uk') {
df2 <- data.frame(query = query, ids = I(ids), prefix = actor$`_source.notPrecededBy`, postfix = actor$`_source.notFollowedBy`, stringsAsFactors = F)
} else {
df2 <- data.frame(query = query, ids = I(ids), stringsAsFactors = F)
df2 <- data.frame(query = query, ids = I(ids), prefix = NA, postfix = NA, stringsAsFactors = F)
}
}
if (exists('df1') == T & exists('df2') == T) {

Loading…
Cancel
Save