actor_fetcher: added option for using dictionaries with just lemmas, besides the option of using lemma_upos dictionaries

master
Your Name 4 years ago
parent 057d225a7a
commit 8eedec8bb5

@ -2,7 +2,7 @@
#' #'
#' Generate actor data frames (with sentiment) from database #' Generate actor data frames (with sentiment) from database
#' @param out Data frame produced by elasticizer #' @param out Data frame produced by elasticizer
#' @param sent_dict Optional dataframe containing the sentiment dictionary (see sentiment paper scripts for details on format) #' @param sent_dict Optional dataframe containing the sentiment dictionary and values. Words should be either in the "lem_u" column when they consist of lemma_upos pairs, or in the "lemma" column when they are just lemmas. The "prox" column should either contain word values, or NAs if not applicable.
#' @param actor_ids Optional vector containing the actor ids to be collected #' @param actor_ids Optional vector containing the actor ids to be collected
#' @param cores Number of threads to use for parallel processing #' @param cores Number of threads to use for parallel processing
#' @param validation Boolean indicating whether human validation should be performed on sentiment scoring #' @param validation Boolean indicating whether human validation should be performed on sentiment scoring
@ -77,10 +77,15 @@ actor_fetcher <- function(out, sent_dict = NULL, actor_ids = NULL, cores = 1, lo
select(-one_of('exists')) %>% select(-one_of('exists')) %>%
unnest() %>% unnest() %>%
filter(upos != 'PUNCT') %>% # For getting proper word counts filter(upos != 'PUNCT') %>% # For getting proper word counts
mutate(lem_u = str_c(lemma,'_',upos)) %>% if ("lem_u" %in% colnames(sent_dict)) {
left_join(sent_dict, by = 'lem_u') %>% ud_sent <- ud_sent %>%
# ### Setting binary sentiment as unit of analysis mutate(lem_u = str_c(lemma,'_',upos)) %>%
# mutate(prox = V3) %>% left_join(sent_dict, by = 'lem_u')
} else if ("lemma" %in% colnames(sent_dict)) {
ud_sent <- ud_sent %>%
left_join(sent_dict, by = 'lemma')
}
ud_sent <- ud_sent %>%
group_by(sentence_id) %>% group_by(sentence_id) %>%
mutate( mutate(
prox = case_when( prox = case_when(

Loading…
Cancel
Save