From 755a58d84d5033e40c7c1aafa0f14e399bf5ecb1 Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Wed, 28 Nov 2018 16:52:05 +0100 Subject: [PATCH] dupe_detect: fix to prevent errors when a query returns no results --- R/dupe_detect.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/dupe_detect.R b/R/dupe_detect.R index 38d29f0..315c2c0 100644 --- a/R/dupe_detect.R +++ b/R/dupe_detect.R @@ -32,13 +32,11 @@ dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, words out <- elasticizer(query, es_pwd = es_pwd) - dfm <- dfm_gen(out, text = "full", words = words) - simil <- as.matrix(textstat_simil(dfm, margin="documents", method="cosine")) - diag(simil) <- NA - df <- as.data.frame(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE)) - - if (length(rownames(df)) > 0) { - df <- df %>% + if (out$hits$total > 0) { + dfm <- dfm_gen(out, text = "full", words = words) + simil <- as.matrix(textstat_simil(dfm, margin="documents", method="cosine")) + diag(simil) <- NA + df <- as.data.frame(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE)) %>% rownames_to_column("rowid") %>% mutate(colid = colnames(simil)[col]) %>% .[,c(1,4)] %>%