dupe_detect: fix to query string for multi-word doctype names

master
Your Name 4 years ago
parent 5bd36dcb44
commit 2a220ded5d

@ -21,7 +21,7 @@
dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super, words, localhost = T, ver) {
params <- grid[row,]
print(paste0('Parsing ',params$doctypes,' on ',params$dates ))
query <- paste0('doctype:',params$doctypes,' && publication_date:',params$dates,' && !computerCodes._delete:1')
query <- paste0('doctype:"',params$doctypes,'" && publication_date:',params$dates,' && !computerCodes._delete:1')
out <- elasticizer(query_string(query, fields = c('country','text','title','subtitle','teaser','preteaser')), es_pwd = es_pwd, localhost= localhost)
if (class(out$hits$hits) != 'list') {
dfm <- dfm_gen(out, text = "full", words = words, clean = T)

Loading…
Cancel
Save