From 37df81b8ff8900b334aef9ed1dbe167fc25c4360 Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Sun, 30 Dec 2018 20:20:38 +0100 Subject: [PATCH] ud_update: fixed merged output field to always contain an (extra) dot (period) at the end of the document --- R/ud_update.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/ud_update.R b/R/ud_update.R index 602101b..a2e6bb0 100644 --- a/R/ud_update.R +++ b/R/ud_update.R @@ -42,12 +42,14 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword out$`_source.preteaser`, out$`_source.teaser`, out$`_source.text`, + '.', sep = ". ") %>% # Remove html tags, and multiple consequent whitespaces str_replace_all("<.{0,20}?>", " ") %>% str_replace_all('(\\. ){2,}', '. ') %>% str_replace_all('([!?.])\\.','\\1') %>% str_replace_all("\\s+"," ") + # out <- filter(out, nchar(merged) > 1) par_proc <- function(row, out, udmodel) { doc <- out[row,] ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>%