You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
988 B
30 lines
988 B
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/dfm_gen.R
|
|
\name{dfm_gen}
|
|
\alias{dfm_gen}
|
|
\title{Generates dfm from ElasticSearch output}
|
|
\usage{
|
|
dfm_gen(out, words = "999", text = "lemmas", clean,
|
|
cores = detectCores())
|
|
}
|
|
\arguments{
|
|
\item{out}{The elasticizer-generated data frame}
|
|
|
|
\item{words}{String indicating the number of words to keep from each document (maximum document length), 999 indicates the whole document}
|
|
|
|
\item{text}{String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud", or ud_upos combining lemmas with upos tags}
|
|
|
|
\item{clean}{Boolean indicating whether the results should be cleaned by removing words matching regex (see code).}
|
|
|
|
\item{cores}{Number of cores to use for parallel processing, defaults to cores (all cores available)}
|
|
}
|
|
\value{
|
|
A Quanteda dfm
|
|
}
|
|
\description{
|
|
Generates dfm from ElasticSearch output
|
|
}
|
|
\examples{
|
|
dfm_gen(out, words = '999')
|
|
}
|