diff --git a/R/dupe_detect.R b/R/dupe_detect.R index 011fd4e..0694057 100644 --- a/R/dupe_detect.R +++ b/R/dupe_detect.R @@ -21,7 +21,7 @@ dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super, words, localhost = T, ver) { params <- grid[row,] print(paste0('Parsing ',params$doctypes,' on ',params$dates )) - query <- paste0('doctype:',params$doctypes,' && publication_date:',params$dates,' && !computerCodes._delete:1') + query <- paste0('doctype:"',params$doctypes,'" && publication_date:',params$dates,' && !computerCodes._delete:1') out <- elasticizer(query_string(query, fields = c('country','text','title','subtitle','teaser','preteaser')), es_pwd = es_pwd, localhost= localhost) if (class(out$hits$hits) != 'list') { dfm <- dfm_gen(out, text = "full", words = words, clean = T)