actor_fetcher: added handling of exception where all actorsids related to a party are individual actors

master
Erik de Vries 5 years ago
parent 526270900c
commit 9e433ecf9e

@ -36,26 +36,31 @@ actor_fetcher <- function(out, sent_dict = NULL, cores = 1, localhost = NULL, va
p_ids <- c(str_c(pid,'_f'),str_c(pid,'_s')) p_ids <- c(str_c(pid,'_f'),str_c(pid,'_s'))
### Party ids including actors ### Party ids including actors
p_ids_a <- c(p_ids,str_c(pid,'_a')) p_ids_a <- c(p_ids,str_c(pid,'_a'))
summarizer <- function (p_ids, out_row, merged_id) { summarizer <- function (p_ids, dupe_df, merged_id) {
return( dupe_df <- dupe_df %>%
out_row %>% filter(ids %in% p_ids)
filter(ids %in% p_ids) %>% if (nrow(dupe_df) > 0) {
summarise( return(
`_id` = first(`_id`), dupe_df %>% summarise(
`_source.doctype` = first(`_source.doctype`), `_id` = first(`_id`),
`_source.publication_date` = first(`_source.publication_date`), `_source.doctype` = first(`_source.doctype`),
prom = list(length(unique(unlist(sentence_id)))/round(occ[[1]]/prom[[1]])), `_source.publication_date` = first(`_source.publication_date`),
sentence_id = list(sort(unique(unlist(sentence_id)))), prom = list(length(unique(unlist(sentence_id)))/round(occ[[1]]/prom[[1]])),
rel_first = list(max(unlist(rel_first))), sentence_id = list(sort(unique(unlist(sentence_id)))),
ids = merged_id, rel_first = list(max(unlist(rel_first))),
occ = list(length(unique(unlist(sentence_id)))), ids = merged_id,
first = list(min(unlist(sentence_id))), occ = list(length(unique(unlist(sentence_id)))),
actor_start = list(sort(unique(unlist(actor_start)))), first = list(min(unlist(sentence_id))),
actor_end = list(sort(unique(unlist(actor_end)))), actor_start = list(sort(unique(unlist(actor_start)))),
sentence_start = list(sort(unique(unlist(sentence_start)))), actor_end = list(sort(unique(unlist(actor_end)))),
sentence_end = list(sort(unique(unlist(sentence_end)))) sentence_start = list(sort(unique(unlist(sentence_start)))),
sentence_end = list(sort(unique(unlist(sentence_end))))
)
) )
) } else {
print(dupe_df$`_id`[[1]])
return(NULL)
}
} }
party <- summarizer(p_ids, dupe_df, str_c(pid,'_mfs')) party <- summarizer(p_ids, dupe_df, str_c(pid,'_mfs'))
party_actor <- summarizer(p_ids_a, dupe_df, str_c(pid,'_mfsa')) party_actor <- summarizer(p_ids_a, dupe_df, str_c(pid,'_mfsa'))

Loading…
Cancel
Save