ud_update: Removed . at end of text, and added mc.preschedule = F for testing

master
Erik de Vries 6 years ago
parent 37df81b8ff
commit c7560d7e32

@ -42,14 +42,12 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword
out$`_source.preteaser`, out$`_source.preteaser`,
out$`_source.teaser`, out$`_source.teaser`,
out$`_source.text`, out$`_source.text`,
'.',
sep = ". ") %>% sep = ". ") %>%
# Remove html tags, and multiple consequent whitespaces # Remove html tags, and multiple consequent whitespaces
str_replace_all("<.{0,20}?>", " ") %>% str_replace_all("<.{0,20}?>", " ") %>%
str_replace_all('(\\. ){2,}', '. ') %>% str_replace_all('(\\. ){2,}', '. ') %>%
str_replace_all('([!?.])\\.','\\1') %>% str_replace_all('([!?.])\\.','\\1') %>%
str_replace_all("\\s+"," ") str_replace_all("\\s+"," ")
# out <- filter(out, nchar(merged) > 1)
par_proc <- function(row, out, udmodel) { par_proc <- function(row, out, udmodel) {
doc <- out[row,] doc <- out[row,]
ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>% ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "default", doc_id = doc$`_id`)) %>%
@ -67,7 +65,7 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword
) )
return(ud) return(ud)
} }
ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores)) ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores, mc.preschedule = F))
bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set', ver = ver) bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set', ver = ver)
res <- elastic_update(bulk, es_super = es_super, localhost = localhost) res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
return(res) return(res)

Loading…
Cancel
Save