From f6006eb9ba686fed99907ab8365f18088f52d40f Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Thu, 25 Apr 2019 11:10:25 +0200 Subject: [PATCH] actorizer: simplified pre/postfix check, only for NA, replace empty strings by NA beforehand --- R/actorizer.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/actorizer.R b/R/actorizer.R index 6a09c9b..79d3f60 100644 --- a/R/actorizer.R +++ b/R/actorizer.R @@ -60,12 +60,12 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t actor_sentences <- unique(ud$sentence_id[ud$actor]) # Sentence ids of sentences mentioning actor # Conducting regex filtering on matches only when there is a prefix and/or postfix to apply - if (!is.na(prefix) || prefix != '' || !is.na(postfix) || postfix != '') { + if (!is.na(prefix) || !is.na(postfix)) { ### If no pre or postfixes, match *not nothing* i.e. anything - if (is.na(prefix) || prefix == '') { + if (is.na(prefix)) { prefix = '$^' } - if (is.na(postfix) || postfix == '') { + if (is.na(postfix)) { postfix = '$^' } sentence_ids <- unlist(lapply(actor_sentences, @@ -122,6 +122,8 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t offsetter <- function(x, pre_tags, post_tags) { return(x-((row(x)-1)*(nchar(pre_tags)+nchar(post_tags)))) } + prefix[prefix==''] <- NA + postfix[postfix==''] <- NA pre_tags_regex <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", pre_tags) post_tags_regex <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", post_tags) out$markers <- mclapply(str_locate_all(out$merged,coll(pre_tags)), offsetter, pre_tags = pre_tags, post_tags = post_tags, mc.cores = detectCores())