#' @param identifier String used to mark highlights. Should be a lowercase string
#' @param identifier String used to mark highlights. Should be a lowercase string
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @param es_super Password for write access to ElasticSearch
#' @param es_super Password for write access to ElasticSearch
#' @param cores Number of cores to use for parallel processing, defaults to cores (all cores available)
#' @return As this is a nested function used within elasticizer, there is no return output
#' @return As this is a nested function used within elasticizer, there is no return output
#' @param words String indicating the number of words to keep from each document (maximum document length), 999 indicates the whole document
#' @param words String indicating the number of words to keep from each document (maximum document length), 999 indicates the whole document
#' @param text String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud", or ud_upos combining lemmas with upos tags
#' @param text String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud", or ud_upos combining lemmas with upos tags
#' @param clean Boolean indicating whether the results should be cleaned by removing words matching regex (see code).
#' @param clean Boolean indicating whether the results should be cleaned by removing words matching regex (see code).
#' @param cores Number of cores to use for parallel processing, defaults to cores (all cores available)
\item{postfix}{Regex containing postfixes that should be excluded from hits}
\item{postfix}{Regex containing postfixes that should be excluded from hits}
\item{identifier}{String used to mark highlights. Should be a lowercase string}
\item{es_super}{Password for write access to ElasticSearch}
\item{es_super}{Password for write access to ElasticSearch}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
\item{cores}{Number of cores to use for parallel processing, defaults to cores (all cores available)}
\item{identifier}{String used to mark highlights. Should be a lowercase string}
}
}
\value{
\value{
As this is a nested function used within elasticizer, there is no return output
As this is a nested function used within elasticizer, there is no return output
dfm_gen(out, words = "999", text = "lemmas", clean)
dfm_gen(out, words = "999", text = "lemmas", clean,
cores = detectCores())
}
}
\arguments{
\arguments{
\item{out}{The elasticizer-generated data frame}
\item{out}{The elasticizer-generated data frame}
@ -14,6 +15,8 @@ dfm_gen(out, words = "999", text = "lemmas", clean)
\item{text}{String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud", or ud_upos combining lemmas with upos tags}
\item{text}{String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud", or ud_upos combining lemmas with upos tags}
\item{clean}{Boolean indicating whether the results should be cleaned by removing words matching regex (see code).}
\item{clean}{Boolean indicating whether the results should be cleaned by removing words matching regex (see code).}
\item{cores}{Number of cores to use for parallel processing, defaults to cores (all cores available)}