lemma_writer: new function to write raw lemma's (without interpunction) to text file. Is structured as elasticizer update function (despite not updating anything on the server)
parent
4407a99774
commit
386ac42aee
@ -0,0 +1,21 @@
|
||||
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||||
#'
|
||||
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||||
#' @param out The elasticizer-generated data frame
|
||||
#' @param file The file to write the output to (including path, when required)
|
||||
#' @param localhost Unused, but defaults to FALSE
|
||||
#' @return A Quanteda dfm
|
||||
#' @export
|
||||
#' @examples
|
||||
#' dfm_gen(out, words = '999')
|
||||
|
||||
|
||||
#################################################################################################
|
||||
#################################### Lemma text file generator #############################
|
||||
#################################################################################################
|
||||
|
||||
lemma_writer <- function(out, file, localhost = F) {
|
||||
out <- unnest(out,`_source.ud`)
|
||||
lemma <- str_c(unlist(out$lemma)[-which(unlist(out$upos) == 'PUNCT')], unlist(out$upos)[-which(unlist(out$upos) == 'PUNCT')], sep = '_')
|
||||
cat(lemma, file = file, append = T)
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/lemma_writer.R
|
||||
\name{lemma_writer}
|
||||
\alias{lemma_writer}
|
||||
\title{Generates text output files (without punctuation) for external applications, such as GloVe embeddings}
|
||||
\usage{
|
||||
lemma_writer(out, file, localhost = F)
|
||||
}
|
||||
\arguments{
|
||||
\item{out}{The elasticizer-generated data frame}
|
||||
|
||||
\item{file}{The file to write the output to (including path, when required)}
|
||||
|
||||
\item{localhost}{Unused, but defaults to FALSE}
|
||||
}
|
||||
\value{
|
||||
A Quanteda dfm
|
||||
}
|
||||
\description{
|
||||
Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||||
}
|
||||
\examples{
|
||||
dfm_gen(out, words = '999')
|
||||
}
|
Loading…
Reference in new issue