Added line to replace multiple whitespace characters in full text by a single regular whitespace

master
Erik de Vries 6 years ago
parent 0e45c0f2d1
commit dc4daf9de4

@ -32,7 +32,8 @@ dfm_gen <- function(out,words = '999', text = c("lemmas","full")) {
str_replace_na(out$`_source.text`, replacement = " "), str_replace_na(out$`_source.text`, replacement = " "),
sep = " ") %>% sep = " ") %>%
# Remove html tags # Remove html tags
str_replace_all("<.*?>", " ") str_replace_all("<.*?>", " ") %>%
str_replace_all("\\s+"," ")
} }
# out$codes <- out$`_source.codes.majorTopic` %>% # out$codes <- out$`_source.codes.majorTopic` %>%
out <- out %>% out <- out %>%

Loading…
Cancel
Save