You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mamlr/R/lemma_writer.R

22 lines
1.0 KiB

#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
#'
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
#' @param out The elasticizer-generated data frame
#' @param file The file to write the output to (including path, when required)
#' @param localhost Unused, but defaults to FALSE
#' @return A Quanteda dfm
#' @export
#' @examples
#' dfm_gen(out, words = '999')
#################################################################################################
#################################### Lemma text file generator #############################
#################################################################################################
lemma_writer <- function(out, file, localhost = F) {
out <- unnest(out,`_source.ud`)
lemma <- str_c(unlist(out$lemma)[-which(unlist(out$upos) == 'PUNCT')], unlist(out$upos)[-which(unlist(out$upos) == 'PUNCT')], sep = '_')
cat(lemma, file = file, append = T)
}