You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
26 lines
2.1 KiB
26 lines
2.1 KiB
#' Classifier function for use in combination with the elasticizer function as 'update' parameter (without brackets), see elasticizer documentation for more information
|
|
#'
|
|
#' Classifier function for use in combination with the elasticizer function as 'update' parameter (without brackets), see elasticizer documentation for more information
|
|
#' @param out Does not need to be defined explicitly! (is already parsed in the elasticizer function)
|
|
#' @param localhost Defaults to false. When true, connect to a local Elasticsearch instance on the default port (9200)
|
|
#' @param model_final The classification model (e.g. output from textstat_nb(), svm() or others)
|
|
#' @param dfm_words A dfm containing all the words and only the words used to generate the model (is used for subsetting)
|
|
#' @param varname String containing the variable name to use for the classification result, usually has the format computerCodes.varname
|
|
#' @param text String indicating whether the "merged" field will contain the "full" text, old-style "lemmas" (will be deprecated), new-style "ud"
|
|
#' @param es_super Password for write access to ElasticSearch
|
|
#' @return As this is a nested function used within elasticizer, there is no return output
|
|
#' @export
|
|
#' @examples
|
|
#' class_update(out, localhost = T, model_final, dfm_words, varname, es_super = .rs.askForPassword('ElasticSearch WRITE'))
|
|
#################################################################################################
|
|
#################################### Update any kind of classification ##########################
|
|
#################################################################################################
|
|
class_update <- function(out, localhost = T, model_final, dfm_words, varname, text, es_super = .rs.askForPassword('ElasticSearch WRITE')) {
|
|
print('updating')
|
|
dfm <- dfm_gen(out, text = text) %>%
|
|
dfm_keep(dfm_words, valuetype="fixed", verbose=T)
|
|
pred <- data.frame(id = out$`_id`, pred = predict(model_final, newdata = dfm))
|
|
bulk <- apply(pred, 1, bulk_writer, varname = varname, type = 'set')
|
|
res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
|
|
}
|