actor_fetcher: added handling of exception where all actorsids related to a party are individual actors

master
Erik de Vries 5 years ago
parent 526270900c
commit 9e433ecf9e

@ -36,26 +36,31 @@ actor_fetcher <- function(out, sent_dict = NULL, cores = 1, localhost = NULL, va
p_ids <- c(str_c(pid,'_f'),str_c(pid,'_s'))
### Party ids including actors
p_ids_a <- c(p_ids,str_c(pid,'_a'))
summarizer <- function (p_ids, out_row, merged_id) {
return(
out_row %>%
filter(ids %in% p_ids) %>%
summarise(
`_id` = first(`_id`),
`_source.doctype` = first(`_source.doctype`),
`_source.publication_date` = first(`_source.publication_date`),
prom = list(length(unique(unlist(sentence_id)))/round(occ[[1]]/prom[[1]])),
sentence_id = list(sort(unique(unlist(sentence_id)))),
rel_first = list(max(unlist(rel_first))),
ids = merged_id,
occ = list(length(unique(unlist(sentence_id)))),
first = list(min(unlist(sentence_id))),
actor_start = list(sort(unique(unlist(actor_start)))),
actor_end = list(sort(unique(unlist(actor_end)))),
sentence_start = list(sort(unique(unlist(sentence_start)))),
sentence_end = list(sort(unique(unlist(sentence_end))))
summarizer <- function (p_ids, dupe_df, merged_id) {
dupe_df <- dupe_df %>%
filter(ids %in% p_ids)
if (nrow(dupe_df) > 0) {
return(
dupe_df %>% summarise(
`_id` = first(`_id`),
`_source.doctype` = first(`_source.doctype`),
`_source.publication_date` = first(`_source.publication_date`),
prom = list(length(unique(unlist(sentence_id)))/round(occ[[1]]/prom[[1]])),
sentence_id = list(sort(unique(unlist(sentence_id)))),
rel_first = list(max(unlist(rel_first))),
ids = merged_id,
occ = list(length(unique(unlist(sentence_id)))),
first = list(min(unlist(sentence_id))),
actor_start = list(sort(unique(unlist(actor_start)))),
actor_end = list(sort(unique(unlist(actor_end)))),
sentence_start = list(sort(unique(unlist(sentence_start)))),
sentence_end = list(sort(unique(unlist(sentence_end))))
)
)
)
} else {
print(dupe_df$`_id`[[1]])
return(NULL)
}
}
party <- summarizer(p_ids, dupe_df, str_c(pid,'_mfs'))
party_actor <- summarizer(p_ids_a, dupe_df, str_c(pid,'_mfsa'))

Loading…
Cancel
Save