You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
22 lines
1.0 KiB
22 lines
1.0 KiB
6 years ago
|
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||
|
#'
|
||
|
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||
|
#' @param out The elasticizer-generated data frame
|
||
|
#' @param file The file to write the output to (including path, when required)
|
||
|
#' @param localhost Unused, but defaults to FALSE
|
||
|
#' @return A Quanteda dfm
|
||
|
#' @export
|
||
|
#' @examples
|
||
|
#' dfm_gen(out, words = '999')
|
||
|
|
||
|
|
||
|
#################################################################################################
|
||
|
#################################### Lemma text file generator #############################
|
||
|
#################################################################################################
|
||
|
|
||
|
lemma_writer <- function(out, file, localhost = F) {
|
||
|
out <- unnest(out,`_source.ud`)
|
||
|
lemma <- str_c(unlist(out$lemma)[-which(unlist(out$upos) == 'PUNCT')], unlist(out$upos)[-which(unlist(out$upos) == 'PUNCT')], sep = '_')
|
||
|
cat(lemma, file = file, append = T)
|
||
|
}
|