diff --git a/R/actor_merger.R b/R/actor_merger.R index 5032108..a5eb5f5 100644 --- a/R/actor_merger.R +++ b/R/actor_merger.R @@ -31,12 +31,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff } return(df[ids %in% id2,] %>% .[!duplicated(.,by = c('id','sentence_id')),.( - actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date), @@ -83,30 +81,14 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff )] text_sent <- df[, - .(text.sent = sum(sent_sum)/sum(words), - text.sent_binary = sum(sent_binary_weighted)/sum(words), - text.sent_sum = sum(sent_sum), + .(text.sent = sum(sent_binary_weighted)/sum(words), text.sent_words = sum(sent_words), text.words = sum(words), - text.arousal = sum(sent_words)/sum(words), + text.arousal = sum(abs(sent_binary_weighted))/sum(words), text.sentences = .N, doctype = first(doctype), publication_date = first(publication_date) ), by = list(id)] -# -# test2 <- text_sent %>% mutate( -# sent_bin = case_when( -# text.sent > pos_cutoff ~ 1, -# text.sent == 0 ~ 0, -# text.sent >= neg_cutoff & text.sent <= pos_cutoff ~ 0, -# TRUE ~ -1 -# ), -# sent_bin_weighted = case_when( -# text.sent_binary > 0 ~ 1, -# text.sent_binary < 0 ~ -1, -# TRUE ~ 0 -# ) -# ) ## Create aggregations according to list of actorId vectors in ids if(!is.null(actor_groups)) { @@ -124,47 +106,39 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff return(output) } else if(!is.null(actors_meta)) { text_noactors <- df[lengths(ids) == 0L, - .(noactor.sent = sum(sent_sum)/sum(words), - noactor.sent_binary = sum(sent_binary_weighted)/sum(words), - noactor.sent_sum = sum(sent_sum), + .(noactor.sent = sum(sent_binary_weighted)/sum(words), noactor.sent_words = sum(sent_words), noactor.words = sum(words), - noactor.arousal = sum(sent_words)/sum(words), + noactor.arousal = sum(abs(sent_binary_weighted))/sum(words), noactor.first = first(sentence_id), noactor.occ = .N ), by = list(id)] all <- df[lengths(ids) > 0L, - .(actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + .(actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date), ids = 'all'), by = list(id)] all_ind <- df[str_detect(ids, 'A_'), - .(actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + .(actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date), ids = 'ind'), by = list(id)] all_par <- df[str_detect(ids, '_f|_s'), - .(actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + .(actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date), @@ -177,12 +151,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff ## Create aggregate measures for individual actors actors <- df[str_starts(ids, 'A_'), - .(actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + .(actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date)), by = list(id, ids)] @@ -219,12 +191,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff TRUE ~ ids), (.SD) ), .SDcols = -c('ids')][,.( - actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date) @@ -236,12 +206,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff ids = str_sub(ids, start = 1, end = -3), (.SD) ),.SDcols = -c('ids')][,.( - actor.sent = sum(sent_sum)/sum(words), - actor.sent_binary = sum(sent_binary_weighted)/sum(words), - actor.sent_sum = sum(sent_sum), + actor.sent = sum(sent_binary_weighted)/sum(words), actor.sent_words = sum(sent_words), actor.words = sum(words), - actor.arousal = sum(sent_words)/sum(words), + actor.arousal = sum(abs(sent_binary_weighted))/sum(words), actor.first = first(sentence_id), actor.occ = .N, publication_date = first(publication_date)