diff --git a/R/elasticizer.R b/R/elasticizer.R
index 86ce8c0..20a33e8 100644
--- a/R/elasticizer.R
+++ b/R/elasticizer.R
@@ -5,7 +5,8 @@
 #' @param src Logical (true/false) indicating whether or not the source of each document should be retrieved
 #' @param index The name of the Elasticsearch index to search through
 #' @param es_pwd The password for Elasticsearch read access
-#' @param size Batch size
+#' @param batch_size Batch size
+#' @param max_batch Maximum number batches to retrieve
 #' @param update When set, indicates an update function to use on each batch of 1000 articles
 #' @param local Defaults to false. When true, connect to a local Elasticsearch instance on the default port (9200)
 #' @param ... Parameters passed on to the update function
@@ -17,7 +18,7 @@
 #################################################################################################
 #################################### Get data from ElasticSearch ################################
 #################################################################################################
-elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassword("Elasticsearch READ"), size = 1024, update = NULL, localhost = F, ...){
+elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassword("Elasticsearch READ"), batch_size = 1024, max_batch = Inf, update = NULL, localhost = F, ...){
   retries <- 10 ### Number of retries on error
   sleep <- 30 ### Number of seconds between retries
   httr::set_config(httr::config(http_version = 0))
@@ -49,7 +50,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
       }
       attempt <- attempt + 1
       try(
-        res <- Search(index = index, time_scroll="20m",body = query, size = size, raw=T)
+        res <- Search(index = index, time_scroll="20m",body = query, size = batch_size, raw=T)
       )
     }
   }
@@ -62,7 +63,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
       }
       attempt <- attempt + 1
       try(
-        res <- Search(index = index, time_scroll="20m",body = query, size = size, raw=T, source = F)
+        res <- Search(index = index, time_scroll="20m",body = query, size = batch_size, raw=T, source = F)
       )
     }
   }
@@ -72,13 +73,13 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
   } else {
     out <-  jsonlite:::flatten(json$hits$hits)
     total <- json$hits$total
-    hits <- 1
+    hits <- length(json$hits$hits)
     batch <- 1
-    print(paste0('Processing documents ',batch*size-size,' through ',batch*size,' out of ',total,' documents.'))
+    print(paste0('Processing documents ',batch*batch_size-batch_size,' through ',batch*batch_size,' out of ',total,' documents.'))
     if (length(update) > 0){
       update(out, localhost = localhost, ...)
     }
-    while(hits != 0){
+    while(hits > 0 && batch < max_batch ){
       res <- NULL
       attempt <- 0
       while( is.null(res) && attempt <= retries ) {
@@ -94,7 +95,7 @@ elasticizer <- function(query, src = T, index = 'maml', es_pwd = .rs.askForPassw
       hits <- length(json$hits$hits)
       if(hits > 0) {
         batch <- batch+1
-        print(paste0('Processing documents ',batch*size-size,' through ',batch*size,' out of ',total,' documents.'))
+        print(paste0('Processing documents ',batch*batch_size-batch_size,' through ',batch*batch_size,' out of ',total,' documents.'))
         if (length(update) > 0){
           out <-  jsonlite:::flatten(json$hits$hits)
           update(out, localhost = localhost, ...)
diff --git a/R/query_string.R b/R/query_string.R
index d45e7e9..c0f332f 100644
--- a/R/query_string.R
+++ b/R/query_string.R
@@ -1,26 +1,62 @@
 #' Generate a query string query for ElasticSearch
 #'
 #' Generate a query string query for ElasticSearch
-#' @param x Query string in ElasticSearch query string format
+#' @param query Query string in ElasticSearch query string format
+#' @param fields List of field names to return, defaults to all
+#' @param random Return randomized results. Boolean, defaults to FALSE
 #' @return A formatted ElasticSearch query string query
 #' @export
 #' @examples
-#' query_string(x)
+#' query_string(query)
 #################################################################################################
 #################################### Get data from ElasticSearch ################################
 #################################################################################################
 
-query_string <- function(x) {
+query_string <- function(query, fields = F, random = F) {
+  if (fields == F) {
+    fields <- '*'
+  }
+  if (random == T) {
+    return(paste0(
+      '{
+      "_source": ',toJSON(fields),',
+        "query": {
+          "function_score": {
+            "query": {
+              "bool":{
+                "filter": [{
+                  "query_string" : {
+                      "default_field" : "text",
+                      "query" : "',query,'",
+                      "default_operator": "AND",
+                      "allow_leading_wildcard" : false
+                  }
+                }]
+              }
+            },
+            "random_score": {},
+            "boost_mode": "sum"
+          }
+        }
+      }'
+    ))
+  } else {
   return(paste0(
     '{
-    "query": {
-        "query_string" : {
-            "default_field" : "text",
-            "query" : "',x,'",
-            "default_operator": "AND",
-            "allow_leading_wildcard" : false
+      "_source": ',toJSON(fields),',
+      "query": {
+        "bool":{
+          "filter": [{
+            "query_string" : {
+                "default_field" : "text",
+                "query" : "',query,'",
+                "default_operator": "AND",
+                "allow_leading_wildcard" : false
+            }
+          }]
         }
-    }
-}'
+      }
+    }'
   ))
+  }
 }
diff --git a/man/dupe_detect.Rd b/man/dupe_detect.Rd
index ee2b699..3fa6859 100644
--- a/man/dupe_detect.Rd
+++ b/man/dupe_detect.Rd
@@ -5,7 +5,7 @@
 \title{Get ids of duplicate documents that have a cosine similarity score higher than [threshold]}
 \usage{
 dupe_detect(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super,
-  words, localhost = T)
+  words, localhost = T, ver)
 }
 \arguments{
 \item{row}{Row of grid to parse}
@@ -23,6 +23,8 @@ dupe_detect(row, grid, cutoff_lower, cutoff_upper = 1, es_pwd, es_super,
 \item{words}{Document cutoff point in number of words. Documents are cut off at the last [.?!] before the cutoff (so document will be a little shorter than [words])}
 
 \item{localhost}{Defaults to true. When true, connect to a local Elasticsearch instance on the default port (9200)}
+
+\item{ver}{Short string (preferably a single word/sequence) indicating the version of the updated document (i.e. for a udpipe update this string might be 'udV2')}
 }
 \value{
 dupe_objects.json and data frame containing each id and all its duplicates. remove_ids.txt and character vector with list of ids to be removed. Files are in current working directory
diff --git a/man/elasticizer.Rd b/man/elasticizer.Rd
index bc5df2f..ada666c 100644
--- a/man/elasticizer.Rd
+++ b/man/elasticizer.Rd
@@ -5,8 +5,8 @@
 \title{Generate a data frame out of unparsed Elasticsearch JSON}
 \usage{
 elasticizer(query, src = T, index = "maml",
-  es_pwd = .rs.askForPassword("Elasticsearch READ"), size = 1024,
-  update = NULL, localhost = F, ...)
+  es_pwd = .rs.askForPassword("Elasticsearch READ"), batch_size = 1024,
+  max_batch = Inf, update = NULL, localhost = F, ...)
 }
 \arguments{
 \item{query}{A JSON-formatted query in the Elasticsearch query DSL}
@@ -17,7 +17,9 @@ elasticizer(query, src = T, index = "maml",
 
 \item{es_pwd}{The password for Elasticsearch read access}
 
-\item{size}{Batch size}
+\item{batch_size}{Batch size}
+
+\item{max_batch}{Maximum number batches to retrieve}
 
 \item{update}{When set, indicates an update function to use on each batch of 1000 articles}
 
diff --git a/man/out_parser.Rd b/man/out_parser.Rd
index 4e804ce..bc4e71d 100644
--- a/man/out_parser.Rd
+++ b/man/out_parser.Rd
@@ -4,12 +4,12 @@
 \alias{out_parser}
 \title{Parse raw text into a single field}
 \usage{
-out_parser(out, type)
+out_parser(out, field)
 }
 \arguments{
 \item{out}{The original output data frame}
 
-\item{type}{Either 'highlight' or '_source', for parsing of the highlighted search result text, or the original source text}
+\item{field}{Either 'highlight' or '_source', for parsing of the highlighted search result text, or the original source text}
 }
 \value{
 a parsed output data frame including the additional column 'merged', containing the merged text
@@ -18,5 +18,5 @@ a parsed output data frame including the additional column 'merged', containing
 Parse raw text into a single field
 }
 \examples{
-out_parser(out,type)
+out_parser(out,field)
 }
diff --git a/man/query_string.Rd b/man/query_string.Rd
index 666c9f4..0c03e2e 100644
--- a/man/query_string.Rd
+++ b/man/query_string.Rd
@@ -4,10 +4,14 @@
 \alias{query_string}
 \title{Generate a query string query for ElasticSearch}
 \usage{
-query_string(x)
+query_string(query, fields = F, random = F)
 }
 \arguments{
-\item{x}{Query string in ElasticSearch query string format}
+\item{query}{Query string in ElasticSearch query string format}
+
+\item{fields}{List of field names to return, defaults to all}
+
+\item{random}{Return randomized results. Boolean, defaults to FALSE}
 }
 \value{
 A formatted ElasticSearch query string query
@@ -16,5 +20,5 @@ A formatted ElasticSearch query string query
 Generate a query string query for ElasticSearch
 }
 \examples{
-query_string(x)
+query_string(query)
 }