actorizer: simplified pre/postfix check, only for NA, replace empty strings by NA beforehand

master
Erik de Vries 5 years ago
parent 298099a4e6
commit f6006eb9ba

@ -60,12 +60,12 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t
actor_sentences <- unique(ud$sentence_id[ud$actor]) # Sentence ids of sentences mentioning actor actor_sentences <- unique(ud$sentence_id[ud$actor]) # Sentence ids of sentences mentioning actor
# Conducting regex filtering on matches only when there is a prefix and/or postfix to apply # Conducting regex filtering on matches only when there is a prefix and/or postfix to apply
if (!is.na(prefix) || prefix != '' || !is.na(postfix) || postfix != '') { if (!is.na(prefix) || !is.na(postfix)) {
### If no pre or postfixes, match *not nothing* i.e. anything ### If no pre or postfixes, match *not nothing* i.e. anything
if (is.na(prefix) || prefix == '') { if (is.na(prefix)) {
prefix = '$^' prefix = '$^'
} }
if (is.na(postfix) || postfix == '') { if (is.na(postfix)) {
postfix = '$^' postfix = '$^'
} }
sentence_ids <- unlist(lapply(actor_sentences, sentence_ids <- unlist(lapply(actor_sentences,
@ -122,6 +122,8 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t
offsetter <- function(x, pre_tags, post_tags) { offsetter <- function(x, pre_tags, post_tags) {
return(x-((row(x)-1)*(nchar(pre_tags)+nchar(post_tags)))) return(x-((row(x)-1)*(nchar(pre_tags)+nchar(post_tags))))
} }
prefix[prefix==''] <- NA
postfix[postfix==''] <- NA
pre_tags_regex <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", pre_tags) pre_tags_regex <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", pre_tags)
post_tags_regex <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", post_tags) post_tags_regex <- gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", post_tags)
out$markers <- mclapply(str_locate_all(out$merged,coll(pre_tags)), offsetter, pre_tags = pre_tags, post_tags = post_tags, mc.cores = detectCores()) out$markers <- mclapply(str_locate_all(out$merged,coll(pre_tags)), offsetter, pre_tags = pre_tags, post_tags = post_tags, mc.cores = detectCores())

Loading…
Cancel
Save