actorizer: fixed exceptionizer by adding whitespace before and after sentence, which is necessary because of negative regex (match anything before or after the highlight string that is NOT x actually requires something to be in front or after)

master
Erik de Vries 6 years ago
parent 593d2de6e2
commit 703b5e59a4

@ -21,7 +21,7 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t
split <- markers[markers %in% seq(min, max, 1)] # Get markers in sentence split <- markers[markers %in% seq(min, max, 1)] # Get markers in sentence
min <- min+((nchar(pre_tags)+nchar(post_tags))*((match(split,markers))-1)) min <- min+((nchar(pre_tags)+nchar(post_tags))*((match(split,markers))-1))
max <- max+((nchar(pre_tags)+nchar(post_tags))*match(split,markers)) # Set end position to include markers (e.g if there are two markers of three characters in the sentence, the end position needs to be shifted by +6) max <- max+((nchar(pre_tags)+nchar(post_tags))*match(split,markers)) # Set end position to include markers (e.g if there are two markers of three characters in the sentence, the end position needs to be shifted by +6)
sentence <- str_sub(doc$merged, min, max) # Extract sentence from text sentence <- paste0(' ',str_sub(doc$merged, min, max),' ') # Extract sentence from text, adding whitespaces before and after for double negation (i.e. Con only when preceded by "("))
# Check if none of the regexes match, if so, return sentence id, otherwise (if one of the regexes match) return nothing # Check if none of the regexes match, if so, return sentence id, otherwise (if one of the regexes match) return nothing
if (!str_detect(sentence, paste0(post_tags_regex,'(',postfix,')')) && !str_detect(sentence, paste0('(',prefix,')',pre_tags_regex))) { if (!str_detect(sentence, paste0(post_tags_regex,'(',postfix,')')) && !str_detect(sentence, paste0('(',prefix,')',pre_tags_regex))) {

Loading…
Cancel
Save