From b9be372543529e963e281f043f2f747479e4513d Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Fri, 11 Jan 2019 15:23:18 +0100 Subject: [PATCH] dupe_detect: fix to get correct colnames from simil (disable stringsAsFactors and convert col values to numeric) --- R/dupe_detect.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/dupe_detect.R b/R/dupe_detect.R index f03a58f..15ff360 100644 --- a/R/dupe_detect.R +++ b/R/dupe_detect.R @@ -40,10 +40,11 @@ dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_su duplicates <- which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE) duplicates <- cbind(duplicates, rowid= rownames(duplicates)) rownames(duplicates) <- seq(1:length(rownames(duplicates))) - df <- as.data.frame(duplicates, make.names = NA) %>% - mutate(colid = colnames(simil)[col]) %>% + df <- as.data.frame(duplicates, make.names = NA, stringsAsFactors = F) %>% + # bind_cols(colid = colnames(simil)[.['col']]) %>% + mutate(colid = colnames(simil)[as.numeric(col)]) %>% .[,c(3,4)] %>% - group_by(rowid) %>% summarise(colid=list(colid)) + group_by(colid) %>% summarise(rowid=list(rowid)) text <- capture.output(stream_out(df)) # write(text[-length(text)], file = paste0(getwd(),'/dupe_objects.json'), append=T) simil[upper.tri(simil)] <- NA