update help file documentation

Reilly-ConceptsCognitionLab · Dec 4, 2023 · d40ecad · d40ecad
1 parent 0f92783
commit d40ecad
Show file tree

Hide file tree

Showing 10 changed files with 13 additions and 70 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
 export(align_dyads)
-export(align_metadata)
 export(clean_dyads)
 export(read_dyads)
 export(summarize_dyads)

diff --git a/R/align_dyads.R b/R/align_dyads.R
@@ -1,9 +1,9 @@
 #' align_dyads
 #'
-#' Yokes user-specified semantic, affective, and phonological values to each word in a cleaned language transcript. Prepares a dataframe aligned by exchange and turn across Participant_IDs.
+#' Yokes user-specified semantic, affective, and phonological values to each word in a cleaned language transcript. Values are aligned by each individual word, and words that are not present in the database are dropped. The number of words dropped is reported by interlocutor in each dyad. Reports an exchange count, which counts by each pair of turns.
 #'
 #' @name align_dyads
-#' @param clean_ts_df a dataframe cleaned and formatted during the read_dyads() function
+#' @param clean_ts_df a dataframe cleaned and formatted during the clean_dyads() function
 #' @return a dataframe one-word-per-row format with variables of interest appended
 #' @importFrom magrittr %>%
 #' @importFrom tidyselect any_of

diff --git a/R/align_metadata.R b/R/align_metadata.R
diff --git a/R/clean_dyads.R b/R/clean_dyads.R
@@ -1,6 +1,6 @@
 #' clean_dyads
 #'
-#' Cleans and Formats raw language transcripts, removing stopwords and formatting dataframe for alignment steps
+#' Cleans and formats language transcripts from the read stage. Removes non-alphabetic characters and stopwords. Language transcripts can be lemmatized by calling lemmatize = TRUE. Vectorizes each utterance and reports the total word count and mean word length by interlocutor in each dyad. Also reports the number of words in each turn.
 #' @name clean_dyads
 #' @param dataframe produced from the read_dyads() function
 #' @return dataframe with stopwords omitted, lemmatized words one per row
@@ -24,14 +24,6 @@
 #' @export clean_dyads
 
 clean_dyads <- function(read_ts_df, lemmatize=TRUE) {
-  #specify a group of speaker names that should be automatically removed from the transcript
-  s_remove <- c("Unknown", "unknown", "Speaker", "speaker", "Other", "other", "E", "e", "Experimenter", "experimenter", "Assistant", "assistant")
-
-  #removes rows from the transcript that have the speaker as specified in the remove
-  if (any(read_ts_df$Participant_ID %in% s_remove) == TRUE){ #conditional in case no matches
-    read_ts_df <- read_ts_df[-which(read_ts_df$Participant_ID %in% s_remove),]
-  }
-
   #set event_id  and speaker names as factors
   read_ts_df$Participant_ID <- as.factor(read_ts_df$Participant_ID) #convert variables to factor
   read_ts_df$event_id <- as.factor(read_ts_df$event_id)

diff --git a/R/read_dyads.R b/R/read_dyads.R
@@ -1,10 +1,10 @@
 #' read_dyads
 #'
-#' Reads pre-formatted conversation transcripts from txt or csv on user's machine; user supplies directory path (e.g., "my_transcripts") to local folder as argument to function call
+#' Reads pre-formatted dyadic (2 interlocutor) conversation transcripts from your machine. Transcripts must be either csv or txt format. IF you are supplying a txt file, your transcript must be formatted as an otter.ai txt file export. Your options for using csv files are more flexible. ConversationAlign minimally requires a csv file with two columns, denoting interlocutor and text. Each separate conversation transcript should be saved as a separate file. ConversationAlign will use the file names as a document ID. Within the read dyads function, set the folder_name argument as the directory path to the local folder containing your transcripts on your machine (e.g., "my_transcripts"). Please see our github page for examples of properly formatted transcripts: https://github.com/Reilly-ConceptsCognitionLab/ConversationAlign
 #'
 #' @name read_dyads
 #' @param folder_name folder of conversation transcripts in csv or txt format
-#' @return a concatenated dataframe with each language transcript saved as a separate 'event_id'; these are split into separate lists for discrete operations in later steps
+#' @return a concatenated dataframe with each language transcript saved as a separate 'event_id'
 #' @importFrom magrittr %>%
 #' @importFrom dplyr select
 #' @importFrom dplyr bind_rows

diff --git a/R/summarize_dyads.R b/R/summarize_dyads.R
@@ -1,6 +1,6 @@
 #' summarize_dyads
 #'
-#' appends AUC and Spearman Rank Correlation indices to each dyad (event_id) using a resampling algoirthm that defaults to the minimum number of exchanges across all documents entered
+#' Calculates and appends 3 measures for quantifying alignment. Appends the mean score for each dimension by turn. Calculates and Spearman's rank correlation between interlocutor time series and appends by transcript. Calculates the area under the curve of the absolute difference time series between interlocutor time series. The length of the difference time series can be standardized the shortest number of exchanges present in the group using an internally defined resampling function, called with resample = TRUE. Spearman's rank correlation and area under the curve become less reliable for dyads under 30 exchanges.
 #'
 #' @name summarize_dyads
 #' @param dataframe produced in the align_dyads function

diff --git a/man/align_dyads.Rd b/man/align_dyads.Rd
diff --git a/man/clean_dyads.Rd b/man/clean_dyads.Rd
diff --git a/man/read_dyads.Rd b/man/read_dyads.Rd
diff --git a/man/summarize_dyads.Rd b/man/summarize_dyads.Rd