Skip to content

Commit

Permalink
turn character vectors into factors
Browse files Browse the repository at this point in the history
  • Loading branch information
sbfnk committed May 29, 2024
1 parent f2c3c22 commit 5af385b
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 5 deletions.
6 changes: 5 additions & 1 deletion R/bi_read.R
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,11 @@ bi_read <- function(x, vars, dims, model, type, file, missval_threshold,
## strip trailing numbers, these indicate duplicate dimensions
dim_col <- sub("\\.[0-9]+$", "", col)
if (!missing(dims) && !is.null(dims) && dim_col %in% names(dims)) {
mav[[col]] <- factor(mav[[col]], labels = dims[[dim_col]])
mav[[col]] <- factor(
mav[[col]] - 1L,
levels = seq_along(dims[[dim_col]]) - 1L,
labels = dims[[dim_col]]
)
} else if (dim_col %in% var_dims[["other"]][[var_name]]) {
mav[[col]] <- mav[[col]] - 1
}
Expand Down
66 changes: 62 additions & 4 deletions R/bi_write.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#' @return A list of the time and coord dims, and factors in extra dimensions,
#' if any
#' @importFrom ncdf4 nc_close ncdim_def ncvar_def nc_create ncvar_put ncvar_add
#' @importFrom data.table data.table copy
#' @importFrom data.table data.table copy rbindlist
#' @importFrom reshape2 melt
#' @examples
#' filename <- tempfile(pattern = "dummy", fileext = ".nc")
Expand Down Expand Up @@ -242,15 +242,19 @@ bi_write <- function(filename, variables, append = FALSE, overwrite = FALSE,
dim_name <- col
## strip trailing numbers, these indicate duplicate dimensions
dim_name <- sub("\\.[0-9]+$", "", dim_name)
dim_values <- unique(element[[col]])
if (is.factor(element[[col]])) {
dim_values <- levels(element[[col]])
} else {
dim_values <- unique(element[[col]])
}
if (dim_name %in% names(dims)) {
if (length(dim_values) != dims[[dim_name]]$len) {
stop(
"Two dimensions of name '", dim_name, "' have different lengths"
)
}
} else {
new_dim <- ncdim_def(dim_name, "", seq_along(unique(dim_values)) - 1)
new_dim <- ncdim_def(dim_name, "", seq_along(dim_values) - 1)
dims[[dim_name]] <- new_dim
if (!(class(dim_values) %in% c("numeric", "integer") &&
length(setdiff(as.integer(dim_values), dim_values)) == 0 &&
Expand Down Expand Up @@ -357,6 +361,7 @@ check_sparse_var <- function(x, coord_cols, value_column) {
setorderv(check, coord_cols)

all_values <- lapply(coord_cols, function(x) unique(check[[x]]))
names(all_values) <- coord_cols
all_combinations <- do.call(CJ, all_values)

## check if for all combinations of other calls the values of coord_cols
Expand All @@ -366,7 +371,12 @@ check_sparse_var <- function(x, coord_cols, value_column) {
all(.SD[, coord_cols, with = FALSE] == all_combinations)
), by = other_cols]

return(any(!all[["all_equal"]]))
## all_factors
all_factors <- vapply(coord_cols, function(x) {
length(setdiff(levels(all_values[[x]]), all_values[[x]])) == 0
}, logical(1))

return(any(!all[["all_equal"]]) || any(!all_factors))
}

##' Create a coordinate variable
Expand Down Expand Up @@ -449,3 +459,51 @@ create_coord_var <- function(name, dims, dim_factors, coord_dim, index_table,
dim = coord_index_dim, dim_factors = dim_factors
))
}

##' Get the factor levels of all character columns in data
##'
##' @param ... variable lists
##' @return a list with elements that represent the factor levels present in
##' character columns
##' @author Sebastian Funk
get_char_levels <- function(...) {
levels <- list()
for (variables in list(...)) {
## convert character strings to factors
data_frames <- names(variables)[
vapply(variables, is.data.frame, logical(1))
]
if (length(data_frames) > 0) {
common <- rbindlist(variables[data_frames], fill = TRUE)
char_cols <- colnames(common)[vapply(common, is.character, logical(1))]
for (col in char_cols) {
levels[[col]] <- union(levels[[col]], unique(na.omit(common[[col]])))
}
}
}
return(levels)
}

##' Convert character columns to factors in data
##'
##' @param levels factor levels, as a named list, each representing one column
##' @inheritParams bi_write
##' @return the \code{variables} argument with factorised columns
##' @author Sebastian Funk
factorise <- function(variables, levels) {
data_frames <- names(variables)[
vapply(variables, is.data.frame, logical(1))
]
if (length(data_frames) > 0) {
for (col in names(levels)) {
## convert character strings to factors
variables[data_frames] <- lapply(variables[data_frames], function(df) {
if (col %in% colnames(df)) {
df[[col]] <- factor(df[[col]], levels = levels[[col]])
}
return(df)
})
}
}
return(variables)
}
5 changes: 5 additions & 0 deletions R/libbi.R
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,11 @@ run.libbi <- function(x, client, proposal = c("model", "prior"), model, fix,
file_args <- intersect(names(args), file_types)
## assign file args to global options
for (arg in file_args) x$options[[arg]] <- get(arg)
list_args <- file_args[vapply(x$options[file_args], is.list, logical(1))]
if (length(list_args) > 0) {
levels <- do.call(get_char_levels, x$options[list_args])
x$options[list_args] <- lapply(x$options[list_args], factorise, levels)
}

if (x$run_flag && length(x$output_file_name) == 1 &&
file.exists(x$output_file_name)) {
Expand Down

0 comments on commit 5af385b

Please sign in to comment.