diff --git a/R/dfm_gen.R b/R/dfm_gen.R
index 8f66735..a55cd27 100644
--- a/R/dfm_gen.R
+++ b/R/dfm_gen.R
@@ -31,18 +31,13 @@ dfm_gen <- function(out, words = '999', text = "lemmas", clean, cores = detectCo
   }
   if ('_source.codes.majorTopic' %in% colnames(out)) {
     out <- out %>%
-      mutate(codes = case_when(
-        .$`_source.codes.timeSpent` == -1 ~ NA_character_,
-        TRUE ~ .$`_source.codes.majorTopic`
-      )
-      ) %>%
       mutate(junk = case_when(
-        .$codes == 2301 ~ 1,
-        .$codes == 3101 ~ 1,
-        .$codes == 34 ~ 1,
-        .$`_source.codes.timeSpent` == -1 ~ NA_real_,
-        TRUE ~ 0
-      )
+          .$codes == 2301 ~ 1,
+          .$codes == 3101 ~ 1,
+          .$codes == 34 ~ 1,
+          .$`_source.codes.timeSpent` == -1 ~ NA_real_,
+          TRUE ~ 0
+        )
       ) %>%
       mutate(aggregate = .$codes %>%
                str_pad(4, side="right", pad="a") %>%
diff --git a/R/modelizer.R b/R/modelizer.R
index a39d734..b6df864 100644
--- a/R/modelizer.R
+++ b/R/modelizer.R
@@ -256,8 +256,15 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
     }
   }
 
-  ## Generate nested CV folds, based on number of inner and outer folds defined (see start of script)
-  folds <- generate_folds(outer_k,inner_k = inner_k, dfm = dfm, class_type = class_type)
+  ### If outer_k is 1, do a holdout training run, with only cross-validation for parameter optimization, else, do nested CV
+  ### If holdout, training/test distribution is the same as for inner CV
+  if (outer_k == 1) {
+    outer_fold <- createDataPartition(as.factor(docvars(dfm, class_type)), p=1-(1/inner_k)*(inner_k-1))
+    folds <- lapply(outer_fold,inner_loop, dfm = dfm, inner_k = inner_k, class_type = class_type)
+  } else {
+    ## Generate nested CV folds, based on number of inner and outer folds defined (see start of script)
+    folds <- generate_folds(outer_k,inner_k = inner_k, dfm = dfm, class_type = class_type)
+  }
 
   ## Get performance of each outer fold validation, and add row with mean scores (This is the final performance indicator)
   performance <- mclapply(folds, outer_cv, grid=grid, dfm=dfm, class_type=class_type, model=model, cores_grid=cores_grid, cores_inner=cores_inner, cores_feats=cores_feats, mc.cores = cores_outer)
diff --git a/R/query_string.R b/R/query_string.R
index b771798..962535f 100644
--- a/R/query_string.R
+++ b/R/query_string.R
@@ -12,7 +12,7 @@
 #################################### Get data from ElasticSearch ################################
 #################################################################################################
 
-query_string <- function(query, fields = F, random = F) {
+query_string <- function(query, fields = F, random = F, default_operator = "AND") {
   if (typeof(fields) == 'logical') {
     fields <- '*'
   }
@@ -28,7 +28,7 @@ query_string <- function(query, fields = F, random = F) {
                   "query_string" : {
                       "default_field" : "text",
                       "query" : "',query,'",
-                      "default_operator": "AND",
+                      "default_operator": "',default_operator,'",
                       "allow_leading_wildcard" : false
                   }
                 }]
@@ -50,7 +50,7 @@ query_string <- function(query, fields = F, random = F) {
             "query_string" : {
                 "default_field" : "text",
                 "query" : "',query,'",
-                "default_operator": "AND",
+                "default_operator": "',default_operator,'",
                 "allow_leading_wildcard" : false
             }
           }]