lemma_writer: new function to write raw lemma's (without interpunction) to text file. Is structured as elasticizer update function (despite not updating anything on the server)
parent
4407a99774
commit
386ac42aee
@ -0,0 +1,21 @@
|
|||||||
|
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||||||
|
#'
|
||||||
|
#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||||||
|
#' @param out The elasticizer-generated data frame
|
||||||
|
#' @param file The file to write the output to (including path, when required)
|
||||||
|
#' @param localhost Unused, but defaults to FALSE
|
||||||
|
#' @return A Quanteda dfm
|
||||||
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' dfm_gen(out, words = '999')
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################################################
|
||||||
|
#################################### Lemma text file generator #############################
|
||||||
|
#################################################################################################
|
||||||
|
|
||||||
|
lemma_writer <- function(out, file, localhost = F) {
|
||||||
|
out <- unnest(out,`_source.ud`)
|
||||||
|
lemma <- str_c(unlist(out$lemma)[-which(unlist(out$upos) == 'PUNCT')], unlist(out$upos)[-which(unlist(out$upos) == 'PUNCT')], sep = '_')
|
||||||
|
cat(lemma, file = file, append = T)
|
||||||
|
}
|
@ -0,0 +1,24 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/lemma_writer.R
|
||||||
|
\name{lemma_writer}
|
||||||
|
\alias{lemma_writer}
|
||||||
|
\title{Generates text output files (without punctuation) for external applications, such as GloVe embeddings}
|
||||||
|
\usage{
|
||||||
|
lemma_writer(out, file, localhost = F)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{out}{The elasticizer-generated data frame}
|
||||||
|
|
||||||
|
\item{file}{The file to write the output to (including path, when required)}
|
||||||
|
|
||||||
|
\item{localhost}{Unused, but defaults to FALSE}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A Quanteda dfm
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Generates text output files (without punctuation) for external applications, such as GloVe embeddings
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
dfm_gen(out, words = '999')
|
||||||
|
}
|
Loading…
Reference in new issue