|
|
|
@ -23,24 +23,6 @@
|
|
|
|
|
#################################### Aggregate actor results ################################
|
|
|
|
|
#################################################################################################
|
|
|
|
|
actor_aggregation <- function(row, actors, es_pwd, localhost, default_operator = 'OR') {
|
|
|
|
|
actor <- actors[row,]
|
|
|
|
|
if (actor$`_source.function` == "Party"){
|
|
|
|
|
years = seq(2000,2019,1)
|
|
|
|
|
} else {
|
|
|
|
|
years = c(0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (actor$`_source.function` == 'Party' && actor$party_only == T) {
|
|
|
|
|
actorids <- c(paste0(actor$`_source.partyId`,'_s'), paste0(actor$`_source.partyId`,'_f'))
|
|
|
|
|
} else if (actor$`_source.function` == 'Party') {
|
|
|
|
|
actorids <- c(paste0(actor$`_source.partyId`,'_s'), paste0(actor$`_source.partyId`,'_f'), paste0(actor$`_source.partyId`,'_a'))
|
|
|
|
|
actor$party_only <- F
|
|
|
|
|
} else {
|
|
|
|
|
actorids <- actor$`_source.actorId`
|
|
|
|
|
actor$party_only <- NULL
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
actor_aggregator <- function(year, query, actor, actorids, default_operator, localhost = F, es_pwd) {
|
|
|
|
|
### Functions
|
|
|
|
|
aggregator <- function (id, duplicates) {
|
|
|
|
|
article <- filter(duplicates, `_id` == id) %>%
|
|
|
|
@ -59,6 +41,52 @@ actor_aggregation <- function(row, actors, es_pwd, localhost, default_operator =
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
### Creating aggregate measuers at daily, weekly, monthly and yearly level
|
|
|
|
|
grouper <- function(level, actor_df, actorids) {
|
|
|
|
|
by_newspaper <- actor_df %>% group_by_at(vars(level, `_source.doctype`)) %>%
|
|
|
|
|
summarise(
|
|
|
|
|
occ = mean(unlist(occ)),
|
|
|
|
|
prom = mean(unlist(prom)),
|
|
|
|
|
rel_first = mean(unlist(rel_first)),
|
|
|
|
|
first = mean(unlist(first)),
|
|
|
|
|
articles = length(`_id`),
|
|
|
|
|
level = level
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
aggregate <- actor_df %>% group_by_at(vars(level)) %>%
|
|
|
|
|
summarise(
|
|
|
|
|
occ = mean(unlist(occ)),
|
|
|
|
|
prom = mean(unlist(prom)),
|
|
|
|
|
rel_first = mean(unlist(rel_first)),
|
|
|
|
|
first = mean(unlist(first)),
|
|
|
|
|
articles = length(`_id`),
|
|
|
|
|
`_source.doctype` = 'agg',
|
|
|
|
|
level = level
|
|
|
|
|
)
|
|
|
|
|
output <- bind_rows(by_newspaper, aggregate) %>%
|
|
|
|
|
bind_cols(.,bind_rows(actor)[rep(seq_len(nrow(bind_rows(actor))), each=nrow(.)),])
|
|
|
|
|
return(output)
|
|
|
|
|
}
|
|
|
|
|
###########################################################################################
|
|
|
|
|
actor <- actors[row,]
|
|
|
|
|
if (actor$`_source.function` == "Party"){
|
|
|
|
|
years = seq(2000,2019,1)
|
|
|
|
|
} else {
|
|
|
|
|
years = c(0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (actor$`_source.function` == 'Party' && actor$party_only == T) {
|
|
|
|
|
actorids <- c(paste0(actor$`_source.partyId`,'_s'), paste0(actor$`_source.partyId`,'_f'))
|
|
|
|
|
} else if (actor$`_source.function` == 'Party') {
|
|
|
|
|
actorids <- c(paste0(actor$`_source.partyId`,'_s'), paste0(actor$`_source.partyId`,'_f'), paste0(actor$`_source.partyId`,'_a'))
|
|
|
|
|
actor$party_only <- F
|
|
|
|
|
} else {
|
|
|
|
|
actorids <- actor$`_source.actorId`
|
|
|
|
|
actor$party_only <- NULL
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
actor_aggregator <- function(year, query, actor, actorids, default_operator, localhost = F, es_pwd) {
|
|
|
|
|
if (year > 0) {
|
|
|
|
|
query <- paste0('computerCodes.actors:(',paste(actorids, collapse = ' '),') && publication_date:[',year,'-01-01 TO ',year,'-12-31] && computerCodes.junk:0')
|
|
|
|
|
} else {
|
|
|
|
@ -69,8 +97,9 @@ actor_aggregation <- function(row, actors, es_pwd, localhost, default_operator =
|
|
|
|
|
localhost = localhost,
|
|
|
|
|
es_pwd = es_pwd)
|
|
|
|
|
if (length(out$`_id`) > 0 ) {
|
|
|
|
|
actor_df <- out
|
|
|
|
|
### Generating actor dataframe, unnest by actorsDetail, then by actor ids. Filter out non-relevant actor ids.
|
|
|
|
|
actor_df <- out %>%
|
|
|
|
|
actor_df <- actor_df %>%
|
|
|
|
|
unnest() %>%
|
|
|
|
|
unnest(ids, .preserve = colnames(.)) %>%
|
|
|
|
|
filter(ids1 %in% actorids) %>%
|
|
|
|
@ -86,7 +115,6 @@ actor_aggregation <- function(row, actors, es_pwd, localhost, default_operator =
|
|
|
|
|
dupe_merged <- bind_rows(lapply(art_id, aggregator, duplicates = duplicates))
|
|
|
|
|
actor_df <- bind_rows(dupe_merged, actor_single)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
### Creating date grouping variables
|
|
|
|
|
actor_df <- actor_df %>%
|
|
|
|
|
mutate(
|
|
|
|
@ -95,34 +123,8 @@ actor_aggregation <- function(row, actors, es_pwd, localhost, default_operator =
|
|
|
|
|
yearmonthday = strftime(actor_df$`_source.publication_date`, format = '%Y%m%d'),
|
|
|
|
|
yearweek = strftime(actor_df$`_source.publication_date`, format = "%Y%V")
|
|
|
|
|
)
|
|
|
|
|
### Creating aggregate measuers at daily, weekly, monthly and yearly level
|
|
|
|
|
grouper <- function(level) {
|
|
|
|
|
by_newspaper <- actor_df %>% group_by_at(vars(level, `_source.doctype`)) %>%
|
|
|
|
|
summarise(
|
|
|
|
|
occ = mean(unlist(occ)),
|
|
|
|
|
prom = mean(unlist(prom)),
|
|
|
|
|
rel_first = mean(unlist(rel_first)),
|
|
|
|
|
first = mean(unlist(first)),
|
|
|
|
|
articles = length(`_id`),
|
|
|
|
|
level = level
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
aggregate <- actor_df %>% group_by_at(vars(level)) %>%
|
|
|
|
|
summarise(
|
|
|
|
|
occ = mean(unlist(occ)),
|
|
|
|
|
prom = mean(unlist(prom)),
|
|
|
|
|
rel_first = mean(unlist(rel_first)),
|
|
|
|
|
first = mean(unlist(first)),
|
|
|
|
|
articles = length(`_id`),
|
|
|
|
|
`_source.doctype` = 'agg',
|
|
|
|
|
level = level
|
|
|
|
|
)
|
|
|
|
|
output <- bind_rows(by_newspaper, aggregate) %>%
|
|
|
|
|
bind_cols(.,bind_rows(actor)[rep(seq_len(nrow(bind_rows(actor))), each=nrow(.)),])
|
|
|
|
|
return(output)
|
|
|
|
|
}
|
|
|
|
|
levels <- c('year','yearmonth','yearmonthday','yearweek')
|
|
|
|
|
aggregate_data <- bind_rows(lapply(levels, grouper))
|
|
|
|
|
aggregate_data <- bind_rows(lapply(levels, grouper, actor_df = actor_df, actorids = actorids))
|
|
|
|
|
return(aggregate_data)
|
|
|
|
|
} else {
|
|
|
|
|
return()
|
|
|
|
|