actorizer: switched to single core processing for debugging

master
Erik de Vries 6 years ago
parent c63409238b
commit 8caf53b90a

@ -22,7 +22,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
} }
sentencizer <- function(row, out, udmodel, ids, prefix, postfix, identifier) { sentencizer <- function(row, out, udmodel, ids, prefix, postfix, identifier) {
print(row)
### If no pre or postfixes, match *not nothing* i.e. anything ### If no pre or postfixes, match *not nothing* i.e. anything
if (is.na(prefix) || prefix == '') { if (is.na(prefix) || prefix == '') {
prefix = '$^' prefix = '$^'
@ -32,6 +32,8 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
} }
### Also needs fix for empty strings (non-NA) ### Also needs fix for empty strings (non-NA)
doc <- out[row,] doc <- out[row,]
print(doc$merged)
print(row)
ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "none", doc_id = doc$`_id`)) %>% ud <- as.data.frame(udpipe_annotate(udmodel, x = doc$merged, parser = "none", doc_id = doc$`_id`)) %>%
filter(upos != "PUNCT") # Removing punctuation to get accurate word counts filter(upos != "PUNCT") # Removing punctuation to get accurate word counts
sentence_count <- length(unique(ud$sentence)) sentence_count <- length(unique(ud$sentence))
@ -74,7 +76,7 @@ actorizer <- function(out, localhost = F, ids, type, prefix, postfix, identifier
str_replace_all("\\s+"," ") str_replace_all("\\s+"," ")
ids <- fromJSON(ids) ids <- fromJSON(ids)
updates <- bind_rows(mclapply(seq(1,length(out[[1]]),1), sentencizer, out = out, ids = ids, postfix = postfix, prefix=prefix, identifier=identifier, udmodel = udmodel, mc.cores = detectCores())) updates <- bind_rows(mclapply(seq(1,length(out[[1]]),1), sentencizer, out = out, ids = ids, postfix = postfix, prefix=prefix, identifier=identifier, udmodel = udmodel, mc.cores = 1))
bulk <- apply(updates, 1, bulk_writer, varname ='actorsDetail', type = 'add') bulk <- apply(updates, 1, bulk_writer, varname ='actorsDetail', type = 'add')
bulk <- c(bulk,apply(updates[c(1,8)], 1, bulk_writer, varname='actors', type = 'add')) bulk <- c(bulk,apply(updates[c(1,8)], 1, bulk_writer, varname='actors', type = 'add'))
return(elastic_update(bulk, es_super = es_super, localhost = localhost)) return(elastic_update(bulk, es_super = es_super, localhost = localhost))

Loading…
Cancel
Save