actorizer, ud_update: implemented 'ver' variable for keeping track of updates

master
Erik de Vries 6 years ago
parent ae23456736
commit 8ffbddc073

@ -8,12 +8,13 @@
#' @param postfix Regex containing postfixes that should be excluded from hits
#' @param identifier String used to mark highlights. Should be a lowercase string
#' @param udmodel The udpipe model used for parsing every hit
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @param es_super Password for write access to ElasticSearch
#' @return As this is a nested function used within elasticizer, there is no return output
#' @export
#' @examples
#' actorizer(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super)
actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super) {
actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super, ver) {
fncols <- function(data, cname) {
add <-cname[!cname%in%names(data)]
@ -82,7 +83,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
ids <- fromJSON(ids)
updates <- bind_rows(mclapply(seq(1,length(out[[1]]),1), sentencizer, out = out, ids = ids, postfix = postfix, prefix=prefix, identifier=identifier, udmodel = udmodel, mc.cores = detectCores()))
bulk <- apply(updates, 1, bulk_writer, varname ='actorsDetail', type = 'add')
bulk <- c(bulk,apply(updates[c(1,8)], 1, bulk_writer, varname='actors', type = 'add'))
bulk <- apply(updates, 1, bulk_writer, varname ='actorsDetail', type = 'add', ver = ver)
bulk <- c(bulk,apply(updates[c(1,8)], 1, bulk_writer, varname='actors', type = 'add', ver = ver))
return(elastic_update(bulk, es_super = es_super, localhost = localhost))
}

@ -6,6 +6,7 @@
#' @param udmodel UDpipe model to use
#' @param es_super Password for write access to ElasticSearch
#' @param cores Number of cores to use for parallel processing, defaults to detectCores() (all cores available)
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @return A vector of 1's indicating the success of each update call
#' @export
#' @examples
@ -18,7 +19,7 @@
# }
# }
ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword("ElasticSearch WRITE"), cores = detectCores()) {
ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword("ElasticSearch WRITE"), cores = detectCores(), ver) {
### Use correct interpunction, by inserting a '. ' at the end of every text field, then removing any duplicate occurences
out <- out %>%
mutate(`_source.title` = str_replace_na(`_source.title`, replacement = '')) %>%
@ -55,7 +56,7 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword
return(ud)
}
ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores))
bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set')
bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set', ver = ver)
res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
return(res)
}

@ -5,7 +5,7 @@
\title{Updater function for elasticizer: Conduct actor searches}
\usage{
actorizer(out, localhost = F, ids, type, prefix, postfix, identifier,
udmodel, es_super)
udmodel, es_super, ver)
}
\arguments{
\item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)}
@ -23,6 +23,8 @@ actorizer(out, localhost = F, ids, type, prefix, postfix, identifier,
\item{udmodel}{The udpipe model used for parsing every hit}
\item{es_super}{Password for write access to ElasticSearch}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
}
\value{
As this is a nested function used within elasticizer, there is no return output

@ -6,7 +6,7 @@
\usage{
ud_update(out, localhost = T, udmodel,
es_super = .rs.askForPassword("ElasticSearch WRITE"),
cores = detectCores())
cores = detectCores(), ver)
}
\arguments{
\item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)}
@ -18,6 +18,8 @@ ud_update(out, localhost = T, udmodel,
\item{es_super}{Password for write access to ElasticSearch}
\item{cores}{Number of cores to use for parallel processing, defaults to detectCores() (all cores available)}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
}
\value{
A vector of 1's indicating the success of each update call

Loading…
Cancel
Save