From 5665b6d6225ecbe36e69a96367277c8fa6599bdd Mon Sep 17 00:00:00 2001
From: Erik de Vries <erik@devries.pm>
Date: Tue, 5 Feb 2019 14:33:55 +0100
Subject: [PATCH] actorizer: more fixes to punctuation

---
 R/actorizer.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/actorizer.R b/R/actorizer.R
index 8baa021..d8f99fe 100644
--- a/R/actorizer.R
+++ b/R/actorizer.R
@@ -33,7 +33,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
 
     ### The exception below is only valid for the UK, where the original UDPipe output misses a dot at the end of the article, but the actor output does not
     ### (UK output is older than actor output, should be updated)
-    if (!(ud_org$lemma[length(ud_org$lemma)] %in% c('!','?','.'))) {
+    if (length(ud_org$sentence_id) == length(ud$sentence_id)-1) {
       ud <- ud[-length(ud$sentence_id),]
     }
     if (length(ud_org$sentence_id) == length(ud$sentence_id)) {
@@ -41,6 +41,8 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
     } else {
       err = T
       print(paste0('ud_org and ud_actor not the same length for id ', doc$`_id`))
+      print(length(ud_org$sentence_id))
+      print(length(ud$sentence_id))
     }
     sentence_count <- length(unique(ud$sentence_id))
     ud <- ud %>%