From 8ff409730437b54b85ef0454353b8dd857c2647d Mon Sep 17 00:00:00 2001
From: Erik de Vries <erik.devries@uis.no>
Date: Wed, 21 Oct 2020 13:50:15 +0200
Subject: [PATCH] renamed actor_merger to sent_merger and implemented fixes to
 work with sentiment data frames without actor ids

---
 R/{actor_merger.R => sent_merger.R} | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)
 rename R/{actor_merger.R => sent_merger.R} (92%)

diff --git a/R/actor_merger.R b/R/sent_merger.R
similarity index 92%
rename from R/actor_merger.R
rename to R/sent_merger.R
index 8eab0f0..806ca74 100644
--- a/R/actor_merger.R
+++ b/R/sent_merger.R
@@ -1,15 +1,15 @@
-#' Aggregate sentence-level dataset containing actors (from sentencizer())
+#' Aggregate sentence-level dataset containing sentiment (from sentencizer())
 #'
-#' Aggregate sentence-level dataset containing actors (from sentencizer())
+#' Aggregate sentence-level dataset containing sentiment (from sentencizer())
 #' @param df Data frame with actor ids, produced by sentencizer
-#' @param actors_meta Data frame containing actor metadata obtained using elasticizer(index="actors")
+#' @param actors_meta Optional data frame containing actor metadata obtained using elasticizer(index="actors")
 #' @param actor_groups Optional list of vectors, where each vector contains actor ids to be merged (e.g. merge all left-wing parties)
 #' @param pos_cutoff Optional value above which sentence-level sentiment scores should be considered "positive"
 #' @param neg_cutoff Optional value below which sentence-level sentiment scores should be considered "negative"
 #' @return When no ids, returns actor-article dataset with individual actors, party aggregations, party-actor aggregations and overall actor sentiment (regardless of specific actors). When ids, returns aggregations for each vector in list
 #' @export
 #' @examples
-#' actor_merger(df, actors_meta, ids = NULL)
+#' sent_merger(df, actors_meta, ids = NULL)
 #################################################################################################
 #################################### Generate actor-article dataset #############################
 #################################################################################################
@@ -18,7 +18,7 @@
 ### some individual actors, where the partyId of an individual actor doesn't match an actual
 ### partyId in the actor dataset
 
-actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff = NULL, neg_cutoff = NULL) {
+sent_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff = NULL, neg_cutoff = NULL) {
   grouper <- function(id2, df) {
     if ('P_1206_a' %in% id2) {
       id2 <- c('P_212_a','P_1771_a',id2)
@@ -61,9 +61,18 @@ actor_merger <- function(df, actors_meta = NULL, actor_groups = NULL, pos_cutoff
   }
 
   ## Unnest to sentence level
-  df <- df[,lapply(.SD, unlist, recursive=F),
-           .SDcols = c('sentence_id', 'sent_sum', 'words', 'sent_words','ids'),
-           by = list(id,publication_date,doctype)]
+
+  ## Check if raw sentiment data contains actor ids
+  if ('ids' %in% colnames(df)) {
+    df <- df[,lapply(.SD, unlist, recursive=F),
+             .SDcols = c('sentence_id', 'sent_sum', 'words', 'sent_words','ids'),
+             by = list(id,publication_date,doctype)]
+  } else {
+    df <- df[,lapply(.SD, unlist, recursive=F),
+             .SDcols = c('sentence_id', 'sent_sum', 'words', 'sent_words'),
+             by = list(id,publication_date,doctype)]
+  }
+
   df <- df[,.(
     (.SD),
     sent = sent_sum/words