From 386ac42aee78d3b73cd5b675cdf10f5a39ed0cd4 Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Tue, 15 Jan 2019 11:36:51 +0100 Subject: [PATCH] lemma_writer: new function to write raw lemma's (without interpunction) to text file. Is structured as elasticizer update function (despite not updating anything on the server) --- R/lemma_writer.R | 21 +++++++++++++++++++++ man/lemma_writer.Rd | 24 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 R/lemma_writer.R create mode 100644 man/lemma_writer.Rd diff --git a/R/lemma_writer.R b/R/lemma_writer.R new file mode 100644 index 0000000..d4daf45 --- /dev/null +++ b/R/lemma_writer.R @@ -0,0 +1,21 @@ +#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings +#' +#' Generates text output files (without punctuation) for external applications, such as GloVe embeddings +#' @param out The elasticizer-generated data frame +#' @param file The file to write the output to (including path, when required) +#' @param localhost Unused, but defaults to FALSE +#' @return A Quanteda dfm +#' @export +#' @examples +#' dfm_gen(out, words = '999') + + +################################################################################################# +#################################### Lemma text file generator ############################# +################################################################################################# + +lemma_writer <- function(out, file, localhost = F) { + out <- unnest(out,`_source.ud`) + lemma <- str_c(unlist(out$lemma)[-which(unlist(out$upos) == 'PUNCT')], unlist(out$upos)[-which(unlist(out$upos) == 'PUNCT')], sep = '_') + cat(lemma, file = file, append = T) +} diff --git a/man/lemma_writer.Rd b/man/lemma_writer.Rd new file mode 100644 index 0000000..bc2a099 --- /dev/null +++ b/man/lemma_writer.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lemma_writer.R +\name{lemma_writer} +\alias{lemma_writer} +\title{Generates text output files (without punctuation) for external applications, such as GloVe embeddings} +\usage{ +lemma_writer(out, file, localhost = F) +} +\arguments{ +\item{out}{The elasticizer-generated data frame} + +\item{file}{The file to write the output to (including path, when required)} + +\item{localhost}{Unused, but defaults to FALSE} +} +\value{ +A Quanteda dfm +} +\description{ +Generates text output files (without punctuation) for external applications, such as GloVe embeddings +} +\examples{ +dfm_gen(out, words = '999') +}