actorizer: updated to data.table for conditional joins

DESCRIPTION: added data.table dependency
master
Your Name 4 years ago
parent 085855908c
commit 69d4b6f5b0

@ -16,7 +16,8 @@ Depends: R (>= 3.3.1),
udpipe, udpipe,
SparseM, SparseM,
future, future,
future.apply future.apply,
data.table (>=1.9.8)
License: Copyright Erik de Vries License: Copyright Erik de Vries
Encoding: UTF-8 Encoding: UTF-8
LazyData: true LazyData: true

@ -50,19 +50,12 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t
mutate( mutate(
sentence_count = n() sentence_count = n()
) )
hits <- as.data.table(ud)[as.data.table(markers), .(`_id`, lemma,x.start, start, end, x.end, sentence_id, merged), on =.(`_id` = `_id`, start <= marker_start, end >= marker_start)] %>%
hits <- left_join(ud, markers, by='_id') %>% mutate(end = x.end,
mutate( start = x.start) %>%
actor = case_when( select(`_id`, sentence_id, start, end,merged) %>%
start <= marker_start & end >= marker_start ~ T,
T ~ F
)
) %>%
select(`_id`, sentence_id, start, end,actor,merged) %>%
filter(actor) %>%
group_by(`_id`,sentence_id) %>% group_by(`_id`,sentence_id) %>%
summarise( summarise(
actor = any(actor),
actor_start = I(list(start)), actor_start = I(list(start)),
actor_end = I(list(end)), actor_end = I(list(end)),
n_markers = length(start), n_markers = length(start),

Loading…
Cancel
Save