actorizer, ud_update: implemented 'ver' variable for keeping track of updates

master
Erik de Vries 6 years ago
parent ae23456736
commit 8ffbddc073

@ -8,12 +8,13 @@
#' @param postfix Regex containing postfixes that should be excluded from hits #' @param postfix Regex containing postfixes that should be excluded from hits
#' @param identifier String used to mark highlights. Should be a lowercase string #' @param identifier String used to mark highlights. Should be a lowercase string
#' @param udmodel The udpipe model used for parsing every hit #' @param udmodel The udpipe model used for parsing every hit
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @param es_super Password for write access to ElasticSearch #' @param es_super Password for write access to ElasticSearch
#' @return As this is a nested function used within elasticizer, there is no return output #' @return As this is a nested function used within elasticizer, there is no return output
#' @export #' @export
#' @examples #' @examples
#' actorizer(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super) #' actorizer(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super)
actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super) { actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier, udmodel, es_super, ver) {
fncols <- function(data, cname) { fncols <- function(data, cname) {
add <-cname[!cname%in%names(data)] add <-cname[!cname%in%names(data)]
@ -82,7 +83,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
ids <- fromJSON(ids) ids <- fromJSON(ids)
updates <- bind_rows(mclapply(seq(1,length(out[[1]]),1), sentencizer, out = out, ids = ids, postfix = postfix, prefix=prefix, identifier=identifier, udmodel = udmodel, mc.cores = detectCores())) updates <- bind_rows(mclapply(seq(1,length(out[[1]]),1), sentencizer, out = out, ids = ids, postfix = postfix, prefix=prefix, identifier=identifier, udmodel = udmodel, mc.cores = detectCores()))
bulk <- apply(updates, 1, bulk_writer, varname ='actorsDetail', type = 'add') bulk <- apply(updates, 1, bulk_writer, varname ='actorsDetail', type = 'add', ver = ver)
bulk <- c(bulk,apply(updates[c(1,8)], 1, bulk_writer, varname='actors', type = 'add')) bulk <- c(bulk,apply(updates[c(1,8)], 1, bulk_writer, varname='actors', type = 'add', ver = ver))
return(elastic_update(bulk, es_super = es_super, localhost = localhost)) return(elastic_update(bulk, es_super = es_super, localhost = localhost))
} }

@ -6,6 +6,7 @@
#' @param udmodel UDpipe model to use #' @param udmodel UDpipe model to use
#' @param es_super Password for write access to ElasticSearch #' @param es_super Password for write access to ElasticSearch
#' @param cores Number of cores to use for parallel processing, defaults to detectCores() (all cores available) #' @param cores Number of cores to use for parallel processing, defaults to detectCores() (all cores available)
#' @param ver Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')
#' @return A vector of 1's indicating the success of each update call #' @return A vector of 1's indicating the success of each update call
#' @export #' @export
#' @examples #' @examples
@ -18,7 +19,7 @@
# } # }
# } # }
ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword("ElasticSearch WRITE"), cores = detectCores()) { ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword("ElasticSearch WRITE"), cores = detectCores(), ver) {
### Use correct interpunction, by inserting a '. ' at the end of every text field, then removing any duplicate occurences ### Use correct interpunction, by inserting a '. ' at the end of every text field, then removing any duplicate occurences
out <- out %>% out <- out %>%
mutate(`_source.title` = str_replace_na(`_source.title`, replacement = '')) %>% mutate(`_source.title` = str_replace_na(`_source.title`, replacement = '')) %>%
@ -55,7 +56,7 @@ ud_update <- function(out, localhost = T, udmodel, es_super = .rs.askForPassword
return(ud) return(ud)
} }
ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores)) ud <- bind_rows(mclapply(seq(1,length(out[[1]]),1), par_proc, out = out, udmodel=udmodel, mc.cores = cores))
bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set') bulk <- apply(ud, 1, bulk_writer, varname = 'ud', type = 'set', ver = ver)
res <- elastic_update(bulk, es_super = es_super, localhost = localhost) res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
return(res) return(res)
} }

@ -5,7 +5,7 @@
\title{Updater function for elasticizer: Conduct actor searches} \title{Updater function for elasticizer: Conduct actor searches}
\usage{ \usage{
actorizer(out, localhost = F, ids, type, prefix, postfix, identifier, actorizer(out, localhost = F, ids, type, prefix, postfix, identifier,
udmodel, es_super) udmodel, es_super, ver)
} }
\arguments{ \arguments{
\item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)} \item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)}
@ -23,6 +23,8 @@ actorizer(out, localhost = F, ids, type, prefix, postfix, identifier,
\item{udmodel}{The udpipe model used for parsing every hit} \item{udmodel}{The udpipe model used for parsing every hit}
\item{es_super}{Password for write access to ElasticSearch} \item{es_super}{Password for write access to ElasticSearch}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
} }
\value{ \value{
As this is a nested function used within elasticizer, there is no return output As this is a nested function used within elasticizer, there is no return output

@ -6,7 +6,7 @@
\usage{ \usage{
ud_update(out, localhost = T, udmodel, ud_update(out, localhost = T, udmodel,
es_super = .rs.askForPassword("ElasticSearch WRITE"), es_super = .rs.askForPassword("ElasticSearch WRITE"),
cores = detectCores()) cores = detectCores(), ver)
} }
\arguments{ \arguments{
\item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)} \item{out}{Does not need to be defined explicitly! (is already parsed in the elasticizer function)}
@ -18,6 +18,8 @@ ud_update(out, localhost = T, udmodel,
\item{es_super}{Password for write access to ElasticSearch} \item{es_super}{Password for write access to ElasticSearch}
\item{cores}{Number of cores to use for parallel processing, defaults to detectCores() (all cores available)} \item{cores}{Number of cores to use for parallel processing, defaults to detectCores() (all cores available)}
\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
} }
\value{ \value{
A vector of 1's indicating the success of each update call A vector of 1's indicating the success of each update call

Loading…
Cancel
Save