diff --git a/DESCRIPTION b/DESCRIPTION
index e829923..b1679d8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,6 +9,7 @@ Depends: R (>= 3.3.1),
     parallel,
     tidyverse,
     quanteda,
+    quanteda.textmodels,
     httr,
     caret,
     e1071,
diff --git a/R/class_update.R b/R/class_update.R
index a33914b..177c6b5 100644
--- a/R/class_update.R
+++ b/R/class_update.R
@@ -18,11 +18,11 @@
 #################################################################################################
 #################################### Update any kind of classification ##########################
 #################################################################################################
-class_update <- function(out, localhost = T, model_final, dfm_words, varname, text, words, clean, ver, es_super = .rs.askForPassword('ElasticSearch WRITE')) {
+class_update <- function(out, localhost = T, model_final, varname, text, words, clean, ver, es_super = .rs.askForPassword('ElasticSearch WRITE')) {
   print('updating')
   dfm <- dfm_gen(out, text = text, words = words, clean = clean) %>%
-    dfm_keep(dfm_words, valuetype="fixed", verbose=T)
-  pred <- data.frame(id = out$`_id`, pred = predict(model_final, newdata = dfm))
+    dfm_weight(weights = model_final$idf)
+  pred <- data.frame(id = out$`_id`, pred = predict(model_final$text_model, newdata = dfm, type = "class", force = T))
   bulk <- apply(pred, 1, bulk_writer, varname = varname, type = 'set', ver = ver)
   res <- elastic_update(bulk, es_super = es_super, localhost = localhost)
 }
diff --git a/R/dfm_gen.R b/R/dfm_gen.R
index 9f98507..f645f32 100644
--- a/R/dfm_gen.R
+++ b/R/dfm_gen.R
@@ -36,6 +36,9 @@ dfm_gen <- function(out, words = '999', text = "lemmas", clean, cores = 1) {
           .$codes == 92 ~ 1,
           .$codes == 91 ~ 1,
           .$codes == 93 ~ 1,
+          .$codes == 2301 ~ 1,
+          .$codes == 3101 ~ 1,
+          .$codes == 34 ~ 1,
           TRUE ~ 0
         )
       ) %>%
@@ -59,6 +62,6 @@ dfm_gen <- function(out, words = '999', text = "lemmas", clean, cores = 1) {
     }
   }
   dfm <- corpus(out$merged, docnames = out$`_id`, docvars = vardoc) %>%
-    dfm(tolower = T, stem = F, remove_punct = T, valuetype = "regex", ngrams = 1)
+    dfm(tolower = T, stem = F, remove_punct = T, valuetype = "regex")
   return(dfm)
 }
diff --git a/R/modelizer.R b/R/modelizer.R
index b6df864..a83e698 100644
--- a/R/modelizer.R
+++ b/R/modelizer.R
@@ -31,7 +31,7 @@
 modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, seed, outer_k, inner_k, model, class_type, opt_measure, country, grid) {
   ### Functions ###
   feat_select <- function (topic, dfm, class_type, percentile,measure) {
-    keyness <- textstat_keyness(dfm, measure = measure, docvars(dfm, class_type) == as.numeric(topic)) %>%
+    keyness <- textstat_keyness(dfm, measure = measure, target = docvars(dfm, class_type) == as.numeric(topic)) %>%
       na.omit()
     keyness[,2] <- abs(keyness[,2])
     keyness <- filter(keyness, keyness[,2] > quantile(as.matrix(keyness[,2]),percentile))$feature
@@ -46,7 +46,7 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
     # if (inner_k <= 1) {
     #   inner_folds <- createDataPartition(as.factor(docvars(dfm[-fold], class_type)), times = 1, p = 1-0.8)
     # } else {
-    inner_folds <- createFolds(as.factor(docvars(dfm[-fold], class_type)), k= inner_k)
+    inner_folds <- createFolds(as.factor(docvars(dfm[-fold,], class_type)), k= inner_k)
     # }
     return(c(outer_fold = list(fold),inner_folds))
   }
@@ -60,6 +60,7 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
 
   ### Gets called for every parameter combination, and calls classifier for every inner cv fold
   inner_cv <- function(row,grid,outer_fold, inner_folds, dfm, class_type, model, cores_inner, cores_feats) {
+    print(str_c('params ',row))
     params <- grid[row,]
     # For each inner fold, cross validate the specified parameters
     res <-
@@ -82,6 +83,7 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
 
   ### Gets called for every outer cv fold, and calls inner_cv for all parameter combinations in grid
   outer_cv <- function(fold, grid, dfm, class_type, model, cores_grid, cores_inner, cores_feats) {
+    print('outer cv')
     # If fold contains both inner folds and outer fold
     if (length(fold) == inner_k + 1) {
       inner_folds <- fold[-1]
@@ -130,39 +132,42 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
     }
   }
 
-  ### Custom tfidf function to allow same idf for different dfm's
-  custom_tfidf <- function(x, scheme_tf = "count", scheme_df = "inverse", base = 10, dfreq = dfreq) {
-    if (!nfeat(x) || !ndoc(x)) return(x)
-    tfreq <- dfm_weight(x, scheme = scheme_tf, base = base)
-    if (nfeat(x) != length(dfreq))
-      stop("missing some values in idf calculation")
-    # get the document indexes
-    j <- as(tfreq, "dgTMatrix")@j + 1
-    # replace just the non-zero values by product with idf
-    x@x <- tfreq@x * dfreq[j]
-    # record attributes
-    x@weightTf <- tfreq@weightTf
-    x@weightDf <- c(list(scheme = scheme_df, base = base), args)
-    return(x)
-  }
+  # ### Custom tfidf function to allow same idf for different dfm's
+  # custom_tfidf <- function(x, scheme_tf = "count", scheme_df = "inverse", base = 10, dfreq = dfreq) {
+  #   if (!nfeat(x) || !ndoc(x)) return(x)
+  #   tfreq <- dfm_weight(x, scheme = scheme_tf, base = base)
+  #   if (nfeat(x) != length(dfreq))
+  #     stop("missing some values in idf calculation")
+  #   # get the document indexes
+  #   j <- as(tfreq, "dgTMatrix")@j + 1
+  #   # replace just the non-zero values by product with idf
+  #   x@x <- tfreq@x * dfreq[j]
+  #   # record attributes
+  #
+  #   ### Not setting weighting parameters in dfm to avoid "grouping after weighting" errors that occur since quanteda 1.4.2
+  #
+  #   # x@weightTf <- tfreq@weightTf
+  #   # x@weightDf <- c(list(scheme = scheme_df, base = base), args)
+  #   return(x)
+  # }
 
   ### Classification function
   classifier <- function (inner_fold, outer_fold, params, dfm, class_type, model, cores_feats) {
     # If both inner and outer folds, subset dfm to outer_fold training set, then create train and test sets according to inner fold. Evaluate performance
     if (length(inner_fold) > 0 && length(outer_fold) > 0)  {
-      dfm_train <- dfm[-outer_fold] %>%
-        .[-inner_fold]
-      dfm_test <- dfm[-outer_fold] %>%
-        .[inner_fold]
+      dfm_train <- dfm[-outer_fold,] %>%
+        .[-inner_fold,]
+      dfm_test <- dfm[-outer_fold,] %>%
+        .[inner_fold,]
       # If only outer folds, but no inner folds, validate performance of outer fold training data on outer fold test data
     } else if (length(outer_fold) > 0 ) {
-      dfm_train <- dfm[-outer_fold]
-      dfm_test <- dfm[outer_fold]
+      dfm_train <- dfm[-outer_fold,]
+      dfm_test <- dfm[outer_fold,]
       validation_cv <- T
       # If only inner folds, validate performance directly on inner folds (is the same as above?)
     } else if (length(inner_fold) > 0 ) {
-      dfm_train <- dfm[-inner_fold]
-      dfm_test <- dfm[inner_fold]
+      dfm_train <- dfm[-inner_fold,]
+      dfm_test <- dfm[inner_fold,]
       # If both inner and outer folds are NULL, set training set to whole dataset, estimate model and return final model
     } else {
       final <- T ### Indicate final modeling run on whole dataset
@@ -176,8 +181,8 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
     ### Getting features from training dataset
     # Getting idf from training data, and using it to normalize both training and testing feature occurence
     dfm_train <- dfm_trim(dfm_train, min_termfreq = 1, min_docfreq = 0)
-    dfreq <- docfreq(dfm_train, scheme = "inverse", base = 10, smoothing = 0, k = 0, threshold = 0, use.names=T)
-    dfm_train <- custom_tfidf(dfm_train, scheme_tf = scheme_tf, scheme_df = "inverse", base = 10, dfreq = dfreq)
+    idf <- docfreq(dfm_train, scheme = "inverse", base = 10, smoothing = 0, k = 0, threshold = 0)
+    dfm_train <- dfm_weight(dfm_train, weights = idf)
     # Added unique to filter out duplicate words, these are caused when there are multiple categories, and a words scores higher
     # than the threshold on two or more of those categories
     words <- unique(unlist(mclapply(unique(docvars(dfm_train, class_type)),
@@ -188,6 +193,7 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
                              measure = params$measures,
                              mc.cores = cores_feats
     )))
+    # dfm_train <- custom_tfidf(dfm_train, scheme_tf = scheme_tf, scheme_df = "inverse", base = 10, dfreq = dfreq)
     dfm_train <- dfm_keep(dfm_train, words, valuetype="fixed", verbose=T)
 
 
@@ -220,20 +226,12 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
     # }
     ### Add more if statements for different models
     if (exists("final")) {
-      return(text_model)
+      return(list(text_model=text_model, idf=idf))
     } else {
       ### Removing all features not in training set from test set and weighting the remaining features according to training idf
       dfm_test <- dfm_keep(dfm_test, pattern = dfm_train, valuetype = "fixed", verbose = T)
-      dfm_test <- custom_tfidf(dfm_test, scheme_tf = scheme_tf, scheme_df = "inverse", base = 10, dfreq = dfreq[words])
+      dfm_test <- dfm_weight(dfm_test, weights = idf)
       pred <- predict(text_model, newdata = dfm_test, type = 'class')
-      if (exists("validation_cv")) {
-        return(data.frame(
-          tv = docvars(dfm_test, class_type),
-          pred = pred,
-          params = params,
-          stringsAsFactors = F
-        ))
-      }
       ### Fix for single-class 'predictions' in borderline situations
       # if (length(unique(pred)) == 1 & class_type == 'junk') {
       #   if (unique(pred) == '0') {
@@ -247,7 +245,21 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
       u <- union(pred, docvars(dfm_test, class_type))
 
       class_table <- table(prediction = factor(pred, u), trueValues = factor(docvars(dfm_test, class_type), u))
-      conf_mat <- confusionMatrix(class_table, mode = "everything")
+      if (length(unique(u)) == 2) {
+        conf_mat <- confusionMatrix(class_table, mode = "everything", positive = max(u))
+      } else {
+        conf_mat <- confusionMatrix(class_table, mode = "everything")
+        conf_mat$positive <- NA
+      }
+      if (exists("validation_cv")) {
+        return(data.frame(
+          tv = docvars(dfm_test, class_type),
+          pred = pred,
+          params = params,
+          pos_cat = conf_mat$positive,
+          stringsAsFactors = F
+        ))
+      }
       if (is.matrix(conf_mat$byClass) == T) {
         return(cbind(as.data.frame(t(conf_mat$overall)),as.data.frame(t(colMeans(conf_mat$byClass))),params))
       } else {