You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mamlr/man/dfm_gen.Rd

41 lines
1.2 KiB

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dfm_gen.R
\name{dfm_gen}
\alias{dfm_gen}
\title{Generates dfm from ElasticSearch output}
\usage{
dfm_gen(
out,
words = "999",
text = "lemmas",
clean,
tolower = T,
binary = F,
ngrams = 1
)
}
\arguments{
\item{out}{The elasticizer-generated data frame}
\item{words}{String indicating the number of words to keep from each document (maximum document length), 999 indicates the whole document}
\item{text}{String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud", or ud_upos combining lemmas with upos tags}
\item{clean}{Boolean indicating whether the results should be cleaned by removing words matching regex (see code).}
\item{tolower}{Boolean indicating whether dfm features should be lowercased}
\item{binary}{Boolean indicating whether or not to generate a binary dfm (only indicating term presence, not count). Defaults to FALSE}
\item{ngrams}{Numeric, if higher than 1, generates ngrams of the given size. Defaults to 1}
}
\value{
A Quanteda dfm
}
\description{
Generates dfm from ElasticSearch output
}
\examples{
dfm_gen(out, words = '999')
}