From d0e9bf565b1227857ecb4f3f308dc6cc5535ace6 Mon Sep 17 00:00:00 2001 From: Erik de Vries Date: Tue, 8 Jan 2019 15:07:40 +0100 Subject: [PATCH] dupe_detect: Reset the _delete value to 1 out_parser: fix to sentence parsing, add additional (empty) string at end of merged field, to make merged field end on . --- R/dupe_detect.R | 2 +- R/out_parser.R | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/dupe_detect.R b/R/dupe_detect.R index 9aad6e0..63e4e9e 100644 --- a/R/dupe_detect.R +++ b/R/dupe_detect.R @@ -49,7 +49,7 @@ dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_su # file = paste0(getwd(),'/remove_ids.txt'), # append=T) dupe_delete <- data.frame(id=unique(rownames(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE))), - dupe_delete = rep(2,length(unique(rownames(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE)))))) + dupe_delete = rep(1,length(unique(rownames(which(simil >= cutoff_lower & simil <= cutoff_upper, arr.ind = TRUE)))))) bulk <- c(apply(df, 1, bulk_writer, varname='duplicates', type = 'set', ver = ver), apply(dupe_delete, 1, bulk_writer, varname='_delete', type = 'set', ver = ver)) if (length(bulk) > 0) { diff --git a/R/out_parser.R b/R/out_parser.R index f4b93e8..c7fe464 100644 --- a/R/out_parser.R +++ b/R/out_parser.R @@ -39,6 +39,7 @@ out_parser <- function(out, field) { out$highlight.preteaser, out$highlight.teaser, out$highlight.text, + '', sep = ". ") } @@ -54,6 +55,7 @@ out_parser <- function(out, field) { out$`_source.preteaser`, out$`_source.teaser`, out$`_source.text`, + '', sep = ". ") }