diff --git a/R/ud_update.R b/R/ud_update.R index a2e6bb0..99e5d70 100644 --- a/R/ud_update.R +++ b/R/ud_update.R @@ -42,14 +42,12 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword out$`_source.preteaser`, out$`_source.teaser`, out$`_source.text`, - '.', sep = ". ") %>% # Remove html tags, and multiple consequent whitespaces str_replace_all("<.{0,20}?>", " ") %>% str_replace_all('(\\. ){2,}', '. ') %>% str_replace_all('([!?.])\\.','\\1') %>% str_replace_all("\\s+"," ") - # out <- filter(out, nchar(merged) > 1) par_proc <- function(row, out, udmodel) { doc <- out[row,] ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>% @@ -67,7 +65,7 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword ) return(ud) } - ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores)) + ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores, mc.preschedule = F)) bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set', ver = ver) res <- elastic_update(bulk, es_super = es_super, localhost = localhost) return(res)