dfm_gen: removed exceptions for NO summer codes

modelizer: created exception for outer_folds = 1
query_string: added parameter for default_operator
master
Erik de Vries 6 years ago
parent fbd525dc2e
commit e199b23227

@ -31,18 +31,13 @@ dfm_gen <- function(out, words = '999', text = "lemmas", clean, cores = detectCo
}
if ('_source.codes.majorTopic' %in% colnames(out)) {
out <- out %>%
mutate(codes = case_when(
.$`_source.codes.timeSpent` == -1 ~ NA_character_,
TRUE ~ .$`_source.codes.majorTopic`
)
) %>%
mutate(junk = case_when(
.$codes == 2301 ~ 1,
.$codes == 3101 ~ 1,
.$codes == 34 ~ 1,
.$`_source.codes.timeSpent` == -1 ~ NA_real_,
TRUE ~ 0
)
.$codes == 2301 ~ 1,
.$codes == 3101 ~ 1,
.$codes == 34 ~ 1,
.$`_source.codes.timeSpent` == -1 ~ NA_real_,
TRUE ~ 0
)
) %>%
mutate(aggregate = .$codes %>%
str_pad(4, side="right", pad="a") %>%

@ -256,8 +256,15 @@ modelizer <- function(dfm, cores_outer, cores_grid, cores_inner, cores_feats, se
}
}
## Generate nested CV folds, based on number of inner and outer folds defined (see start of script)
folds <- generate_folds(outer_k,inner_k = inner_k, dfm = dfm, class_type = class_type)
### If outer_k is 1, do a holdout training run, with only cross-validation for parameter optimization, else, do nested CV
### If holdout, training/test distribution is the same as for inner CV
if (outer_k == 1) {
outer_fold <- createDataPartition(as.factor(docvars(dfm, class_type)), p=1-(1/inner_k)*(inner_k-1))
folds <- lapply(outer_fold,inner_loop, dfm = dfm, inner_k = inner_k, class_type = class_type)
} else {
## Generate nested CV folds, based on number of inner and outer folds defined (see start of script)
folds <- generate_folds(outer_k,inner_k = inner_k, dfm = dfm, class_type = class_type)
}
## Get performance of each outer fold validation, and add row with mean scores (This is the final performance indicator)
performance <- mclapply(folds, outer_cv, grid=grid, dfm=dfm, class_type=class_type, model=model, cores_grid=cores_grid, cores_inner=cores_inner, cores_feats=cores_feats, mc.cores = cores_outer)

@ -12,7 +12,7 @@
#################################### Get data from ElasticSearch ################################
#################################################################################################
query_string <- function(query, fields = F, random = F) {
query_string <- function(query, fields = F, random = F, default_operator = "AND") {
if (typeof(fields) == 'logical') {
fields <- '*'
}
@ -28,7 +28,7 @@ query_string <- function(query, fields = F, random = F) {
"query_string" : {
"default_field" : "text",
"query" : "',query,'",
"default_operator": "AND",
"default_operator": "',default_operator,'",
"allow_leading_wildcard" : false
}
}]
@ -50,7 +50,7 @@ query_string <- function(query, fields = F, random = F) {
"query_string" : {
"default_field" : "text",
"query" : "',query,'",
"default_operator": "AND",
"default_operator": "',default_operator,'",
"allow_leading_wildcard" : false
}
}]

Loading…
Cancel
Save