actor_merger: changed computation of arousal, and removed uninformative variables

master
Your Name 4 years ago
parent 3cdb68b196
commit 955f034e6a

@ -31,12 +31,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
} }
return(df[ids %in% id2,] %>% return(df[ids %in% id2,] %>%
.[!duplicated(.,by = c('id','sentence_id')),.( .[!duplicated(.,by = c('id','sentence_id')),.(
actor.sent = sum(sent_sum)/sum(words), actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date), publication_date = first(publication_date),
@ -83,30 +81,14 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
)] )]
text_sent <- df[, text_sent <- df[,
.(text.sent = sum(sent_sum)/sum(words), .(text.sent = sum(sent_binary_weighted)/sum(words),
text.sent_binary = sum(sent_binary_weighted)/sum(words),
text.sent_sum = sum(sent_sum),
text.sent_words = sum(sent_words), text.sent_words = sum(sent_words),
text.words = sum(words), text.words = sum(words),
text.arousal = sum(sent_words)/sum(words), text.arousal = sum(abs(sent_binary_weighted))/sum(words),
text.sentences = .N, text.sentences = .N,
doctype = first(doctype), doctype = first(doctype),
publication_date = first(publication_date) publication_date = first(publication_date)
), by = list(id)] ), by = list(id)]
#
# test2 <- text_sent %>% mutate(
# sent_bin = case_when(
# text.sent > pos_cutoff ~ 1,
# text.sent == 0 ~ 0,
# text.sent >= neg_cutoff & text.sent <= pos_cutoff ~ 0,
# TRUE ~ -1
# ),
# sent_bin_weighted = case_when(
# text.sent_binary > 0 ~ 1,
# text.sent_binary < 0 ~ -1,
# TRUE ~ 0
# )
# )
## Create aggregations according to list of actorId vectors in ids ## Create aggregations according to list of actorId vectors in ids
if(!is.null(actor_groups)) { if(!is.null(actor_groups)) {
@ -124,47 +106,39 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
return(output) return(output)
} else if(!is.null(actors_meta)) { } else if(!is.null(actors_meta)) {
text_noactors <- df[lengths(ids) == 0L, text_noactors <- df[lengths(ids) == 0L,
.(noactor.sent = sum(sent_sum)/sum(words), .(noactor.sent = sum(sent_binary_weighted)/sum(words),
noactor.sent_binary = sum(sent_binary_weighted)/sum(words),
noactor.sent_sum = sum(sent_sum),
noactor.sent_words = sum(sent_words), noactor.sent_words = sum(sent_words),
noactor.words = sum(words), noactor.words = sum(words),
noactor.arousal = sum(sent_words)/sum(words), noactor.arousal = sum(abs(sent_binary_weighted))/sum(words),
noactor.first = first(sentence_id), noactor.first = first(sentence_id),
noactor.occ = .N noactor.occ = .N
), by = list(id)] ), by = list(id)]
all <- df[lengths(ids) > 0L, all <- df[lengths(ids) > 0L,
.(actor.sent = sum(sent_sum)/sum(words), .(actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date), publication_date = first(publication_date),
ids = 'all'), by = list(id)] ids = 'all'), by = list(id)]
all_ind <- df[str_detect(ids, 'A_'), all_ind <- df[str_detect(ids, 'A_'),
.(actor.sent = sum(sent_sum)/sum(words), .(actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date), publication_date = first(publication_date),
ids = 'ind'), by = list(id)] ids = 'ind'), by = list(id)]
all_par <- df[str_detect(ids, '_f|_s'), all_par <- df[str_detect(ids, '_f|_s'),
.(actor.sent = sum(sent_sum)/sum(words), .(actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date), publication_date = first(publication_date),
@ -177,12 +151,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
## Create aggregate measures for individual actors ## Create aggregate measures for individual actors
actors <- df[str_starts(ids, 'A_'), actors <- df[str_starts(ids, 'A_'),
.(actor.sent = sum(sent_sum)/sum(words), .(actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date)), by = list(id, ids)] publication_date = first(publication_date)), by = list(id, ids)]
@ -219,12 +191,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
TRUE ~ ids), TRUE ~ ids),
(.SD) (.SD)
), .SDcols = -c('ids')][,.( ), .SDcols = -c('ids')][,.(
actor.sent = sum(sent_sum)/sum(words), actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date) publication_date = first(publication_date)
@ -236,12 +206,10 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
ids = str_sub(ids, start = 1, end = -3), ids = str_sub(ids, start = 1, end = -3),
(.SD) (.SD)
),.SDcols = -c('ids')][,.( ),.SDcols = -c('ids')][,.(
actor.sent = sum(sent_sum)/sum(words), actor.sent = sum(sent_binary_weighted)/sum(words),
actor.sent_binary = sum(sent_binary_weighted)/sum(words),
actor.sent_sum = sum(sent_sum),
actor.sent_words = sum(sent_words), actor.sent_words = sum(sent_words),
actor.words = sum(words), actor.words = sum(words),
actor.arousal = sum(sent_words)/sum(words), actor.arousal = sum(abs(sent_binary_weighted))/sum(words),
actor.first = first(sentence_id), actor.first = first(sentence_id),
actor.occ = .N, actor.occ = .N,
publication_date = first(publication_date) publication_date = first(publication_date)

Loading…
Cancel
Save