From 2a220ded5d654d74e7fdabf3da31f1540006cad8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 10 Jun 2020 15:06:35 +0200 Subject: [PATCH] dupe_detect: fix to query string for multi-word doctype names --- R/dupe_detect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dupe_detect.R b/R/dupe_detect.R index 011fd4e..0694057 100644 --- a/R/dupe_detect.R +++ b/R/dupe_detect.R @@ -21,7 +21,7 @@ dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super, words, localhost = T, ver) { params <- grid[row,] print(paste0('Parsing ',params$doctypes,' on ',params$dates )) - query <- paste0('doctype:',params$doctypes,' && publication_date:',params$dates,' && !computerCodes._delete:1') + query <- paste0('doctype:"',params$doctypes,'" && publication_date:',params$dates,' && !computerCodes._delete:1') out <- elasticizer(query_string(query, fields = c('country','text','title','subtitle','teaser','preteaser')), es_pwd = es_pwd, localhost= localhost) if (class(out$hits$hits) != 'list') { dfm <- dfm_gen(out, text = "full", words = words, clean = T)