From e110780ad506ebe0f9bcc6cb8d29d295257a5a44 Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Wed, 16 Jan 2019 19:21:20 +0100 Subject: [PATCH] merger: idiotic fix for a non-problem, see comment on line 32 --- R/merger.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/merger.R b/R/merger.R index 440c462..5c0c0c0 100644 --- a/R/merger.R +++ b/R/merger.R @@ -29,6 +29,10 @@ merger <- function(row, out, text, clean) { # Regex removes all words consisting of or containing numbers, @#$% # Punctuation is not taken into account, as it is already filtered out, see above {if(clean == T) str_replace_all(.,"\\S*?[0-9@#$%]+[^\\s]*", "") else . } + # In the very rare but obviously occuring (CxqrOmMB4Bzg6Uhtzw0P) case that a document consists only of punctuation, return an empty string + if (length(lemmas) == 0 ){ + lemmas <- '' + } return(lemmas) } # Replacing $-marked punctuation with their regular forms