modelizer: fixed error when only one class is predicted for junk classification (borderline case)

master
Erik de Vries 6 years ago
parent a13d86b92d
commit a5ba00146f

@ -73,9 +73,9 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
mc.cores = cores_inner mc.cores = cores_inner
) )
) )
print(res) # print(res)
print(res[1,1]) # print(res[1,1])
print('inner_cv') # print('inner_cv')
return(cbind(as.data.frame(t(colMeans(select(res, 1:`Balanced Accuracy`)))),params)) return(cbind(as.data.frame(t(colMeans(select(res, 1:`Balanced Accuracy`)))),params))
} }
@ -98,9 +98,9 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
cores_inner = cores_inner, cores_inner = cores_inner,
mc.cores = cores_grid) mc.cores = cores_grid)
) )
print(res) # print(res)
print(res[1,1]) # print(res[1,1])
print('outer_cv') # print('outer_cv')
# Determine optimum hyperparameters within outer fold training set # Determine optimum hyperparameters within outer fold training set
optimum <- res[which.max(res[,opt_measure]),] %>% optimum <- res[which.max(res[,opt_measure]),] %>%
select(percentiles: ncol(.)) select(percentiles: ncol(.))
@ -122,9 +122,9 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
cores_inner = cores_inner, cores_inner = cores_inner,
mc.cores = cores_grid) mc.cores = cores_grid)
) )
print(res) # print(res)
print(res[1,1]) # print(res[1,1])
print('line 126, final model parameter optimization') # print('line 126, final model parameter optimization')
return(res) return(res)
} }
} }
@ -224,6 +224,16 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
dfm_test <- dfm_keep(dfm_test, pattern = dfm_train, valuetype = "fixed", verbose = T) dfm_test <- dfm_keep(dfm_test, pattern = dfm_train, valuetype = "fixed", verbose = T)
dfm_test <- custom_tfidf(dfm_test, scheme_tf = scheme_tf, scheme_df = "inverse", base = 10, dfreq = dfreq[words]) dfm_test <- custom_tfidf(dfm_test, scheme_tf = scheme_tf, scheme_df = "inverse", base = 10, dfreq = dfreq[words])
pred <- predict(text_model, newdata = dfm_test, type = 'class') pred <- predict(text_model, newdata = dfm_test, type = 'class')
### Fix for single-class 'predictions' in borderline situations
if (length(unique(pred)) == 1 & class_type == 'junk') {
if (unique(pred) == '0') {
pred[1] <- '1'
} else {
pred[1] <- '0'
}
}
class_table <- table(prediction = pred, trueValues = docvars(dfm_test, class_type)) class_table <- table(prediction = pred, trueValues = docvars(dfm_test, class_type))
conf_mat <- confusionMatrix(class_table, mode = "everything") conf_mat <- confusionMatrix(class_table, mode = "everything")
if (is.matrix(conf_mat$byClass) == T) { if (is.matrix(conf_mat$byClass) == T) {

Loading…
Cancel
Save