You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mamlr/R/aggregator_elastic.R

62 lines
3.0 KiB

### Notes:
# Do you want to search for either one OR other actorid, or both occuring in the same document?
# Do you want to keep only the occurences of the actorids you are searching for, or all actor occurences in the hits?
# Search by actorId, then aggregate by month
# When actorId starts with P_, define what hits you want to get (short, full, actor), if more than one, aggregate properly
# Develop query generator for specific actors (ie combine actorId with start and end dates)
#' Generate and store aggregate actor measures to elasticsearch
#'
#' Generate and store aggregate actor measures to elasticsearch
#' @param out The output provided by elasticizer()
#' @param localhost Boolean indicating if the script should run locally, or remote
#' @param es_super Write password for ES
#' @param actorids List of actorids used in the search, should be the same as the actorids used for elasticizer()
#' @param ver String indicating the version of the update
#' @return Return value is based on output of elastic_update()
#' @export
#' @examples
#' aggregator_elastic(out, localhost = F, actorids, ver, es_super)
#################################################################################################
#################################### Aggregate actor results ################################
#################################################################################################
aggregator_elastic <- function(out, localhost = F, actorids, ver, es_super) {
### Generating actor dataframe, unnest by actorsDetail, then by actor ids. Filter out non-relevant actor ids.
partyid <- str_sub(actorids[1], end=-3)
actor_df <- out %>%
unnest() %>%
unnest(ids, .preserve = colnames(.)) %>%
filter(ids1 %in% actorids)
agg_party_actors <- bind_rows(lapply(unique(actor_df$`_id`),
mamlr:::aggregator,
actor_df = actor_df,
merge_id = paste0(partyid,'_mfsa')))
party <- actor_df %>%
filter(!endsWith(ids1, '_a'))
agg_party <- bind_rows(lapply(unique(party$`_id`),
mamlr:::aggregator,
actor_df = party,
merge_id = paste0(partyid,'_mfs')))
actors_only <- actor_df %>%
filter(endsWith(ids1, '_a'))
agg_actors <- bind_rows(lapply(unique(actors_only$`_id`),
mamlr:::aggregator,
actor_df = actors_only,
merge_id = paste0(partyid,'_ma')))
df_out <- bind_rows(agg_party_actors, agg_party, agg_actors)
doc_ids <- df_out$doc_id
df_out <- df_out %>%
select(-1) %>%
split(as.factor(doc_ids))
df_out <- data.frame(doc_id = names(df_out), list = I(df_out))
bulk <- apply(df_out, 1, bulk_writer, varname ='actorsDetail', type = 'add', ver = ver)
return(elastic_update(bulk, es_super = es_super, localhost = localhost))
}