|
|
|
@ -22,7 +22,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sentencizer <- function(row, out, udmodel, ids, prefix, postfix, identifier) {
|
|
|
|
|
print(row)
|
|
|
|
|
|
|
|
|
|
### If no pre or postfixes, match *not nothing* i.e. anything
|
|
|
|
|
if (is.na(prefix) || prefix == '') {
|
|
|
|
|
prefix = '$^'
|
|
|
|
@ -32,6 +32,8 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
|
|
|
|
|
}
|
|
|
|
|
### Also needs fix for empty strings (non-NA)
|
|
|
|
|
doc <- out[row,]
|
|
|
|
|
print(doc$merged)
|
|
|
|
|
print(row)
|
|
|
|
|
ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "none", doc_id = doc$`_id`)) %>%
|
|
|
|
|
filter(upos != "PUNCT") # Removing punctuation to get accurate word counts
|
|
|
|
|
sentence_count <- length(unique(ud$sentence))
|
|
|
|
|