elasticizer: updated dump handling to create a dump for every batch, instead of one big file at the end
out_parser: streamlined code
query_gen_actors: only include relevant fields
ud_update: changed function parameters to work with elasticizer dump function
#' @param identifier String used to mark highlights. Should be a lowercase string
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @param es_super Password for write access to ElasticSearch
#' @param cores Number of cores to use for parallel processing, defaults to cores (all cores available)
#' @return As this is a nested function used within elasticizer, there is no return output
max<-max+((nchar(pre_tags)+nchar(post_tags))*match(split,markers))# Set end position to include markers (e.g if there are two markers of three characters in the sentence, the end position needs to be shifted by +6)
sentence<-paste0(' ',str_sub(doc$merged,min,max),' ')# Extract sentence from text, adding whitespaces before and after for double negation (i.e. Con only when preceded by "("))
# Check if none of the regexes match, if so, return sentence id, otherwise (if one of the regexes match) return nothing
if (!str_detect(sentence,paste0(post_tags_regex,'(',postfix,')'))&&!str_detect(sentence,paste0('(',prefix,')',pre_tags_regex))){
markers<-doc$markers[[1]][,'start']# Extract list of markers
# Convert markers to udpipe rows (in some cases the start position doesn't align with the udpipe token start position (e.g. when anti-|||EU is treated as a single word))
#' Elasticizer update function: generate UDpipe output from base text
#' Generate UDpipe output from base text
#'
#' Elasticizer update function: generate UDpipe output from base text
#' @param out Does not need to be defined explicitly! (is already parsed in the elasticizer function)
#' @param udmodel UDpipe model to use
#' Generate UDpipe output from base text
#' @param file Filename of file to read in, also used for generating output file name
#' @param wd Working directory where *file*s can be found
#' @param ud_file Filename of udpipe model to use, should be in *wd*
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @param file Filename for output (ud_ is automatically prepended)
#' @return A vector of 1's indicating the success of each update call
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
\item{cores}{Number of cores to use for parallel processing, defaults to cores (all cores available)}
\item{identifier}{String used to mark highlights. Should be a lowercase string}
\title{Elasticizer update function: generate UDpipe output from base text}
\title{Generate UDpipe output from base text}
\usage{
ud_update(out, udmodel, ver)
ud_update(file, wd, ud_file, ver)
}
\arguments{
\item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)}
\item{file}{Filename of file to read in, also used for generating output file name}
\item{udmodel}{UDpipe model to use}
\item{wd}{Working directory where *file*s can be found}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
\item{ud_file}{Filename of udpipe model to use, should be in *wd*}
\item{file}{Filename for output (ud_ is automatically prepended)}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
}
\value{
A vector of 1's indicating the success of each update call
}
\description{
Elasticizer update function: generate UDpipe output from base text