dupe_detect: fix to prevent errors when a query returns no results

master
Erik de Vries 6 years ago
parent 887f1aa774
commit 755a58d84d

@ -32,13 +32,11 @@ dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, words
out <- elasticizer(query, es_pwd = es_pwd) out <- elasticizer(query, es_pwd = es_pwd)
dfm <- dfm_gen(out, text = "full", words = words) if (out$hits$total > 0) {
simil <- as.matrix(textstat_simil(dfm, margin="documents", method="cosine")) dfm <- dfm_gen(out, text = "full", words = words)
diag(simil) <- NA simil <- as.matrix(textstat_simil(dfm, margin="documents", method="cosine"))
df <- as.data.frame(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE)) diag(simil) <- NA
df <- as.data.frame(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE)) %>%
if (length(rownames(df)) > 0) {
df <- df %>%
rownames_to_column("rowid") %>% rownames_to_column("rowid") %>%
mutate(colid = colnames(simil)[col]) %>% mutate(colid = colnames(simil)[col]) %>%
.[,c(1,4)] %>% .[,c(1,4)] %>%

Loading…
Cancel
Save