|
|
@ -21,7 +21,7 @@
|
|
|
|
dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super, words, localhost = T, ver) {
|
|
|
|
dupe_detect <- function(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super, words, localhost = T, ver) {
|
|
|
|
params <- grid[row,]
|
|
|
|
params <- grid[row,]
|
|
|
|
print(paste0('Parsing ',params$doctypes,' on ',params$dates ))
|
|
|
|
print(paste0('Parsing ',params$doctypes,' on ',params$dates ))
|
|
|
|
query <- paste0('doctype:"',params$doctypes,'" && publication_date:',params$dates,' && !computerCodes._delete:1')
|
|
|
|
query <- paste0('doctype:\\"',params$doctypes,'\\" && publication_date:',params$dates,' && !computerCodes._delete:1')
|
|
|
|
out <- elasticizer(query_string(query, fields = c('country','text','title','subtitle','teaser','preteaser')), es_pwd = es_pwd, localhost= localhost)
|
|
|
|
out <- elasticizer(query_string(query, fields = c('country','text','title','subtitle','teaser','preteaser')), es_pwd = es_pwd, localhost= localhost)
|
|
|
|
if (class(out$hits$hits) != 'list') {
|
|
|
|
if (class(out$hits$hits) != 'list') {
|
|
|
|
dfm <- dfm_gen(out, text = "full", words = words, clean = T)
|
|
|
|
dfm <- dfm_gen(out, text = "full", words = words, clean = T)
|
|
|
|