make participant id and event id colname checks not case sensitive an…

…d stop making all metadata factors
Reilly-ConceptsCognitionLab · Oct 21, 2023 · aca2619 · aca2619
1 parent 17de2e5
commit aca2619
Showing 1 changed file with 8 additions and 4 deletions.
diff --git a/R/align_dyads.R b/R/align_dyads.R
@@ -74,7 +74,7 @@ align_dyads <- function(clean_ts_df) {
   #DEFINE THE METADATA ALIGN FUNCTION
   align_metadata <- function(aligned_ts_df) {
     #allow user to input the file path to demographic data, randomly assign groups, or not align groups
-    ask_meta_filepath <- readline(writeLines("If you would like to align metadata by interlocutor and event ID, input the absolute or relative file path to the metadata csv file.\nThe file path should not be in quotes (e.g. my_data/metadata.csv)\nThe csv file must contain column names 'Participant_ID' and 'event_id' (case sensitive) or it will not align\nIf you do not wish to align metadata press enter \nEnter 'random' to randomly assign a variable to each interlocutor in each dyad."))
+    ask_meta_filepath <- readline(writeLines("If you would like to align metadata by interlocutor and event ID, input the absolute or relative file path to the metadata csv file.\nThe file path should not be in quotes (e.g. my_data/metadata.csv)\nThe csv file must contain column names 'Participant_ID' and 'event_id' or it will not align\nIf you do not wish to align metadata press enter \nEnter 'random' to randomly assign a variable to each interlocutor in each dyad."))
     #if user inputs 'random', randomly assigns groups across transcripts
     if (str_to_lower(ask_meta_filepath) == "random") {
       randomly <- lapply(split(aligned_ts_df, aligned_ts_df$event_id), function(x){ #iterates over each doc
@@ -102,21 +102,25 @@ align_dyads <- function(clean_ts_df) {
       #allows the user to specify which columns they want to subset
       subset_metadata <- select.list(c(colnames(metadata), "Select all columns"),
                                      preselect = NULL, multiple = TRUE,
-                                     title = "Select the columns you would like to subset. The 'Participant_ID' and 'event_ID' (case sensitive) columns must be included.",
+                                     title = "Select the columns you would like to subset. The 'Participant_ID' and 'event_ID' column must be included.",
                                      graphics = FALSE)
       #if the select all option is chosen, selects every column
       if (any(grepl("Select all columns", subset_metadata)) == TRUE) {
         subset_metadata <- colnames(metadata)
       }
       metadata_selected <- metadata[,colnames(metadata) %in% subset_metadata] #select specified columns
+
+      #check for event and participant column names and replace with correct cases if incorrect
+      colnames(metadata_selected)[grep("^event_id$", colnames(metadata_selected), ignore.case = T)] <- "event_id"
+      colnames(metadata_selected)[grep("^Participant_ID$", colnames(metadata_selected), ignore.case = T)] <- "Participant_ID"
+
       #select dimensions that aren't used to align on
       meta_dims <- which(!colnames(metadata_selected) %in% c("event_id", "Participant_ID"))
-      #make all dimensions that aren't alingers factors
-      metadata_selected[,meta_dims] <- lapply(metadata_selected[,meta_dims], factor)
 
       #join metadata to aligned data frame by event id and PID
       metadata_aligned_df <- dplyr::left_join(aligned_ts_df, metadata_selected,
                                               by=c("event_id", "Participant_ID"))
+
       return(metadata_aligned_df)
     }
   }