actor_fetcher: added lemma output when validating, to detect most problematic lemmas

master
Erik de Vries 5 years ago
parent 499ee74f0d
commit 84df9658ff

@ -39,20 +39,21 @@ actor_fetcher <- function(out, sent_dict = NULL, cores = 1, localhost = NULL, va
select(-one_of('exists')) %>% select(-one_of('exists')) %>%
unnest() %>% unnest() %>%
filter(upos != 'PUNCT') %>% # For getting proper word counts filter(upos != 'PUNCT') %>% # For getting proper word counts
mutate(V1 = str_c(lemma,'_',upos)) %>% mutate(lem_u = str_c(lemma,'_',upos)) %>%
left_join(sent_dict, by = 'V1') %>% left_join(sent_dict, by = 'lem_u') %>%
# ### Setting binary sentiment as unit of analysis # ### Setting binary sentiment as unit of analysis
# mutate(V2 = V3) %>% # mutate(prox = V3) %>%
group_by(sentence_id) %>% group_by(sentence_id) %>%
mutate( mutate(
V2 = case_when( prox = case_when(
is.na(V2) == T ~ 0, is.na(prox) == T ~ 0,
TRUE ~ V2 TRUE ~ prox
) )
) %>% ) %>%
summarise(sent_sum = sum(V2), summarise(sent_sum = sum(prox),
words = length(lemma), words = length(lemma),
sent_words = length(na.omit(V3))) %>% sent_words = sum(prox != 0),
sent_lemmas = list(lem_u[prox != 0])) %>%
mutate( mutate(
sent = sent_sum/words, sent = sent_sum/words,
arousal = sent_words/words arousal = sent_words/words

Loading…
Cancel
Save