From e5c87cf69d1cb2fa82a9e2db0346a87719c49faa Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Thu, 13 Dec 2018 12:03:23 +0100 Subject: [PATCH] actorizer: more debug prints --- R/actorizer.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/actorizer.R b/R/actorizer.R index 53f6738..a5be910 100644 --- a/R/actorizer.R +++ b/R/actorizer.R @@ -22,7 +22,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier } sentencizer <- function(row, out, udmodel, ids, prefix, postfix, identifier) { - print(row) + ### If no pre or postfixes, match *not nothing* i.e. anything if (is.na(prefix) || prefix == '') { prefix = '$^' @@ -32,6 +32,8 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier } ### Also needs fix for empty strings (non-NA) doc <- out[row,] + print(doc$merged) + print(row) ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "none", doc_id = doc$`_id`)) %>% filter(upos != "PUNCT") # Removing punctuation to get accurate word counts sentence_count <- length(unique(ud$sentence))