#' Select features using quanteda textstat_keyness
#'
#' Select features based on the textstat_keyness function and a percentile cutoff
#' Percentiles are based on absolute values i.e. both on words that are key and *not* key to the topic
#'
#' @param topic The topic to determine keywords for
#' @param dfm The input dfm
#' @param class_type Name of the column in docvars containing the classification
#' @param percentile Cutoff for the list of words that should be returned
#' @param measure Measure to use in determining keyness, default = chi2; see textstat_keyness for other options
#' @return A vector of words that are key to the topic
#' @export
#' @examples
#' feat_select(topic, dfm, class_type, percentile, measure="chi2")
#################################################################################################
#################################### Feature selection ##########################################
#################################################################################################

feat_select <- function (topic, dfm, class_type, percentile, measure="chi2") {
  # Use quanteda textstat_keyness to determine feature importance
  keyness <- textstat_keyness(dfm, measure = measure, target = docvars(dfm, class_type) == as.numeric(topic)) %>%
    na.omit()
  # Convert keyness values to absolute values, to take into account both positive and negative extremes
  keyness[,2] <- abs(keyness[,2])
  # Keep only the words with an absolute keyness value falling in the top [percentile] percentile
  keyness <- filter(keyness, keyness[,2] > quantile(as.matrix(keyness[,2]),percentile))$feature
  return(keyness)
}