actorizer: updated to data.table for conditional joins

DESCRIPTION: added data.table dependency
5 years ago · 69d4b6f5b0
parent 085855908c
commit 69d4b6f5b0
2 changed files with 6 additions and 12 deletions
--- a/3
+++ b/3
@ -16,7 +16,8 @@ Depends: R (>= 3.3.1),
    udpipe,
    SparseM,
    future,
-    future.apply
+    future.apply,
+    data.table (>=1.9.8)
 License: Copyright Erik de Vries
 Encoding: UTF-8
 LazyData: true
--- a/R/actorizer.R
+++ b/R/actorizer.R
@ -50,19 +50,12 @@ actorizer <- function(out, localhost = F, ids, prefix, postfix, pre_tags, post_t
    mutate(
      sentence_count = n()
    )
-
-  hits <- left_join(ud, markers, by='_id') %>%
-    mutate(
-      actor = case_when(
-        start <= marker_start  & end >= marker_start ~ T,
-        T ~ F
-      )
-    ) %>%
-    select(`_id`, sentence_id, start, end,actor,merged) %>%
-    filter(actor) %>%
+  hits <- as.data.table(ud)[as.data.table(markers), .(`_id`, lemma,x.start, start, end, x.end, sentence_id, merged), on =.(`_id` = `_id`, start <= marker_start, end >= marker_start)] %>%
+    mutate(end = x.end,
+           start = x.start) %>%
+    select(`_id`, sentence_id, start, end,merged) %>%
    group_by(`_id`,sentence_id) %>%
    summarise(
-      actor = any(actor),
      actor_start = I(list(start)),
      actor_end = I(list(end)),
      n_markers = length(start),