diff --git a/NAMESPACE b/NAMESPACE
index 0878746..c7b163c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,6 +17,7 @@ export(metric_gen)
 export(modelizer)
 export(modelizer_old)
 export(out_parser)
+export(preproc)
 export(query_gen_actors)
 export(query_string)
 export(ud_update)
diff --git a/R/class_update.R b/R/class_update.R
index 8e51472..596c1bb 100644
--- a/R/class_update.R
+++ b/R/class_update.R
@@ -20,8 +20,10 @@
 #################################################################################################
 class_update <- function(out, localhost = T, model_final, varname, text, words, clean, ver, es_super = .rs.askForPassword('ElasticSearch WRITE'), cores = 1) {
   print('updating')
-  dfm <- dfm_gen(out, text = text, words = words, clean = clean, cores = cores) %>%
-    dfm_weight(weights = model_final$idf)
+  dfm <- dfm_gen(out, text = text, words = words, clean = clean, cores = cores)
+  if (!is.null(model_final$idf)) {
+    dfm <- dfm_weight(dfm, weights = model_final$idf)
+  }
   pred <- data.frame(id = out$`_id`, pred = predict(model_final$text_model, newdata = dfm, type = "class", force = T))
   bulk <- apply(pred, 1, bulk_writer, varname = varname, type = 'set', ver = ver)
   res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
diff --git a/R/estimator.R b/R/estimator.R
index 1242fa0..63ef19d 100644
--- a/R/estimator.R
+++ b/R/estimator.R
@@ -13,7 +13,7 @@
 #' @examples
 #' estimator(row, grid, outer_folds, dfm, class_type, model)
 #################################################################################################
-#################################### Generate CV folds ##########################################
+#################################### Generate models ############################################
 #################################################################################################
 
 ### Classification function
@@ -40,29 +40,16 @@ estimator <- function (row, grid, outer_folds, inner_folds, dfm, class_type, mod
     final <- T ### Indicate final modeling run on whole dataset
     dfm_train <- dfm
   }
-  ## Currently scheme_tf is not used explicitly
-  # if (model == 'nb') {
-  #   scheme_tf <- 'count' # The 'old' way
-  # } else {
-  #   scheme_tf <- 'prop' # The 'new' way
-  # }
-
-  ### Getting features from training dataset
-  # Getting idf from training data, and using it to normalize both training and testing feature occurence
-  dfm_train <- dfm_trim(dfm_train, min_termfreq = 1, min_docfreq = 0)
-  idf <- docfreq(dfm_train, scheme = "inverse", base = 10, smoothing = 0, k = 0, threshold = 0)
-  dfm_train <- dfm_weight(dfm_train, weights = idf)
-
-  # Keeping unique words that are important to one or more categories (see textstat_keyness and feat_select)
-  words <- unique(unlist(lapply(unique(docvars(dfm_train, class_type)),
-                                  feat_select,
-                                  dfm = dfm_train,
-                                  class_type = class_type,
-                                  percentile = params$percentiles,
-                                  measure = params$measures
-  )))
-  dfm_train <- dfm_keep(dfm_train, words, valuetype="fixed", verbose=T)
 
+  if (exists("final")) {
+    preproc_dfm <- preproc(dfm_train, NULL, params)
+    dfm_train <- preproc_dfm$dfm_train
+  } else {
+    preproc_dfm <- preproc(dfm_train, dfm_test, params)
+    dfm_train <- preproc_dfm$dfm_train
+    dfm_test <- preproc_dfm$dfm_test
+  }
+  idf <- preproc_dfm$idf
 
   if (model == "nb") {
     text_model <- textmodel_nb(dfm_train, y = docvars(dfm_train, class_type), smooth = .001, prior = "uniform", distribution = "multinomial")
@@ -80,7 +67,6 @@ estimator <- function (row, grid, outer_folds, inner_folds, dfm, class_type, mod
   if (exists("final")) {
     return(list(text_model=text_model, idf=idf))
   } else { # Create a test set, and classify test items
-    dfm_test <- dfm_weight(dfm_test, weights = idf)
     # Use force=T to keep only features present in both training and test set
     pred <- predict(text_model, newdata = dfm_test, type = 'class', force = T)
 
diff --git a/R/preproc.R b/R/preproc.R
new file mode 100644
index 0000000..c497bff
--- /dev/null
+++ b/R/preproc.R
@@ -0,0 +1,42 @@
+#' Preprocess dfm data for use in modeling procedure
+#'
+#' Process dfm according to parameters provided in params
+#'
+#' @param dfm_train Training dfm
+#' @param dfm_test Testing dfm if applicable, otherwise NULL
+#' @param params Row from grid with parameter optimization
+#' @return List with dfm_train and dfm_test, processed according to parameters in params
+#' @export
+#' @examples
+#' preproc(dfm_train, dfm_test = NULL, params)
+#################################################################################################
+#################################### Preprocess data ############################################
+#################################################################################################
+preproc <- function(dfm_train, dfm_test = NULL, params) {
+  # Remove non-existing features from training dfm
+  dfm_train <- dfm_trim(dfm_train, min_termfreq = 1, min_docfreq = 0)
+  if (params$tfidf) {
+    idf <- docfreq(dfm_train, scheme = "inverse", base = 10, smoothing = 0, k = 0, threshold = 0)
+    dfm_train <- dfm_weight(dfm_train, weights = idf)
+    if (!is.null(dfm_test)) {
+      dfm_test <- dfm_weight(dfm_test, weights = idf)
+    }
+  } else {
+    idf <- NULL
+  }
+
+  if ("feat_percentiles" %in% colnames(params) && "feat_measures" %in% colnames(params)) {
+
+    # Keeping unique words that are important to one or more categories (see textstat_keyness and feat_select)
+    words <- unique(unlist(lapply(unique(docvars(dfm_train, params$class_type)),
+                                  feat_select,
+                                  dfm = dfm_train,
+                                  class_type = params$class_type,
+                                  percentile = params$feat_percentiles,
+                                  measure = params$feat_measures
+    )))
+    dfm_train <- dfm_keep(dfm_train, words, valuetype="fixed", verbose=F)
+  }
+
+  return(list(dfm_train = dfm_train, dfm_test = dfm_test, idf = idf))
+}
diff --git a/man/preproc.Rd b/man/preproc.Rd
new file mode 100644
index 0000000..ab30c92
--- /dev/null
+++ b/man/preproc.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/preproc.R
+\name{preproc}
+\alias{preproc}
+\title{Preprocess dfm data for use in modeling procedure}
+\usage{
+preproc(dfm_train, dfm_test = NULL, params)
+}
+\arguments{
+\item{dfm_train}{Training dfm}
+
+\item{dfm_test}{Testing dfm if applicable, otherwise NULL}
+
+\item{params}{Row from grid with parameter optimization}
+}
+\value{
+List with dfm_train and dfm_test, processed according to parameters in params
+}
+\description{
+Process dfm according to parameters provided in params
+}
+\examples{
+preproc(dfm_train, dfm_test = NULL, params)
+}