diff --git a/R/actor_fetcher.R b/R/actor_fetcher.R index e934574..1619667 100644 --- a/R/actor_fetcher.R +++ b/R/actor_fetcher.R @@ -36,26 +36,31 @@ actor_fetcher <- function(out, sent_dict = NULL, cores = 1, localhost = NULL, va p_ids <- c(str_c(pid,'_f'),str_c(pid,'_s')) ### Party ids including actors p_ids_a <- c(p_ids,str_c(pid,'_a')) - summarizer <- function (p_ids, out_row, merged_id) { - return( - out_row %>% - filter(ids %in% p_ids) %>% - summarise( - `_id` = first(`_id`), - `_source.doctype` = first(`_source.doctype`), - `_source.publication_date` = first(`_source.publication_date`), - prom = list(length(unique(unlist(sentence_id)))/round(occ[[1]]/prom[[1]])), - sentence_id = list(sort(unique(unlist(sentence_id)))), - rel_first = list(max(unlist(rel_first))), - ids = merged_id, - occ = list(length(unique(unlist(sentence_id)))), - first = list(min(unlist(sentence_id))), - actor_start = list(sort(unique(unlist(actor_start)))), - actor_end = list(sort(unique(unlist(actor_end)))), - sentence_start = list(sort(unique(unlist(sentence_start)))), - sentence_end = list(sort(unique(unlist(sentence_end)))) + summarizer <- function (p_ids, dupe_df, merged_id) { + dupe_df <- dupe_df %>% + filter(ids %in% p_ids) + if (nrow(dupe_df) > 0) { + return( + dupe_df %>% summarise( + `_id` = first(`_id`), + `_source.doctype` = first(`_source.doctype`), + `_source.publication_date` = first(`_source.publication_date`), + prom = list(length(unique(unlist(sentence_id)))/round(occ[[1]]/prom[[1]])), + sentence_id = list(sort(unique(unlist(sentence_id)))), + rel_first = list(max(unlist(rel_first))), + ids = merged_id, + occ = list(length(unique(unlist(sentence_id)))), + first = list(min(unlist(sentence_id))), + actor_start = list(sort(unique(unlist(actor_start)))), + actor_end = list(sort(unique(unlist(actor_end)))), + sentence_start = list(sort(unique(unlist(sentence_start)))), + sentence_end = list(sort(unique(unlist(sentence_end)))) + ) ) - ) + } else { + print(dupe_df$`_id`[[1]]) + return(NULL) + } } party <- summarizer(p_ids, dupe_df, str_c(pid,'_mfs')) party_actor <- summarizer(p_ids_a, dupe_df, str_c(pid,'_mfsa'))