From 9419d6dc084dfecd649202f8bfa7212998fc919e Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Fri, 7 May 2021 15:34:59 +0200 Subject: [PATCH] Fixed incorrect mfs and mfsa aggregations. Previously multiple party/actor mentions in the same sentence (e.g. both a *_f and *_s mention) would all be taken into account separately, while the sentence should only be considered once --- R/sent_merger.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/sent_merger.R b/R/sent_merger.R index 9dae3b9..7baaae9 100644 --- a/R/sent_merger.R +++ b/R/sent_merger.R @@ -181,7 +181,7 @@ sent_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff ids %in% str_c('P_',as.character(1630:1647)) ~ 'P_1629', TRUE ~ ids), (.SD) - ), .SDcols = -c('ids')][,.( + ), .SDcols = -c('ids')] %>% .[!duplicated(.,by = c('id','ids','sentence_id')),.( actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), @@ -196,7 +196,7 @@ sent_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff parties <- df[str_ends(ids,'_f') | str_ends(ids,'_s'),.( ids = str_sub(ids, start = 1, end = -3), (.SD) - ),.SDcols = -c('ids')][,.( + ),.SDcols = -c('ids')] %>% .[!duplicated(.,by = c('id','ids','sentence_id')),.( actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words),