Skip to content

Commit

Permalink
Merge pull request #40 from fmicompbio/hgvs-names
Browse files Browse the repository at this point in the history
HGVS names
  • Loading branch information
mbstadler authored Mar 6, 2023
2 parents 4d566cc + 19db145 commit 2359969
Show file tree
Hide file tree
Showing 15 changed files with 539 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
config:
- { os: macOS-latest, bioc: 'release', curlConfigPath: '/usr/bin/'}
- { os: windows-latest, bioc: 'release'}
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_15", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_16", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mutscan
Title: Preprocessing and Analysis of Deep Mutational Scanning Data
Version: 0.2.34
Version: 0.2.35
Authors@R:
c(person(given = "Charlotte",
family = "Soneson",
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# mutscan 0.2.35

* Add alternative names for variants (including HGVS identifiers)

# mutscan 0.2.34

* Expand examples in function documentation

# mutscan 0.2.33

* Replace Matrix.utils::aggregate.Matrix (removed from CRAN) by DelayedArray::rowsum
Expand Down
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ translateString <- function(s) {
.Call(`_mutscan_translateString`, s)
}

makeBaseHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq, varSeq) {
.Call(`_mutscan_makeBaseHGVS`, mutationsSorted, mutNameDelimiter, wtSeq, varSeq)
}

test_makeAAHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq) {
.Call(`_mutscan_test_makeAAHGVS`, mutationsSorted, mutNameDelimiter, wtSeq)
}

test_decomposeRead <- function(sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength) {
.Call(`_mutscan_test_decomposeRead`, sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength)
}
Expand Down
9 changes: 7 additions & 2 deletions R/collapseMutantsByAA.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,13 @@ collapseMutantsByAA <- function(se, nameCol = "mutantNameAA") {
rd <- mergeValues(SummarizedExperiment::rowData(se)[[nameCol]],
SummarizedExperiment::rowData(se)$sequence) %>%
stats::setNames(c(nameCol, "sequence"))
for (v in c("mutantName", "sequenceAA", "mutationTypes",
"nbrMutBases", "nbrMutCodons", "nbrMutAAs")) {
for (v in setdiff(
intersect(c("mutantName", "mutantNameBase", "mutantNameBaseHGVS",
"mutantNameCodon", "mutantNameAA", "mutantNameAAHGVS",
"sequenceAA", "mutationTypes",
"nbrMutBases", "nbrMutCodons", "nbrMutAAs"),
colnames(SummarizedExperiment::rowData(se))),
nameCol)) {
tmp <- mergeValues(SummarizedExperiment::rowData(se)[[nameCol]],
SummarizedExperiment::rowData(se)[[v]])
rd[[v]] <- tmp$valueColl[match(rd[[nameCol]], tmp$mutantNameColl)]
Expand Down
14 changes: 11 additions & 3 deletions R/digestFastqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,18 @@
#' \item{summaryTable}{A \code{data.frame} that contains, for each observed
#' mutation combination, the corresponding variable region sequences (or pair of
#' sequences), the number of observed such sequences, and the number of unique
#' UMIs observed for the sequence. It also has a column named 'maxNbrReads',
#' which contains the number of reads for the most frequent observed sequence
#' UMIs observed for the sequence. It also has additional columns: 'maxNbrReads'
#' contains the number of reads for the most frequent observed sequence
#' represented by the feature (only relevant if similar variable regions are
#' collapsed).}
#' collapsed). 'nbrMutBases', 'nbrMutCodons' and 'nbrMutAAs' give the number of
#' mutated bases, codons or amino acids in each variant. Alternative variant
#' names based on base, codon or amino acid sequence are provided in columns
#' 'mutantNameBase', 'mutantNameCodon', 'mutantNameAA'. In addition,
#' 'mutantNameBaseHGVS' and 'mutantNameAAHGVS' give base- and amino acid-based
#' names following the HGVS nomenclature (https://varnomen.hgvs.org/). Please
#' note that the provided reference sequence names are used for the HGVS
#' sequence identifiers. It is up to the user to use appropriately named
#' reference sequences in order to obtain valid HGVS variant names.}
#' \item{filterSummary}{A \code{data.frame} that contains the number of input
#' reads, the number of reads filtered out in the processing, and the number of
#' retained reads. The filters are named according to the convention
Expand Down
8 changes: 6 additions & 2 deletions R/summarizeExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,12 @@ summarizeExperiment <- function(x, coldata, countType = "umis") {
## Also here, each column can contain multiple values separated with ,
## (e.g. if variable sequences were collapsed to WT in digestFastqs)
## ------------------------------------------------------------------------
for (v in c("nbrMutBases", "nbrMutCodons", "nbrMutAAs",
"sequenceAA", "mutantNameAA", "mutationTypes", "varLengths")) {
for (v in intersect(c("nbrMutBases", "nbrMutCodons", "nbrMutAAs",
"mutantNameBase", "mutantNameBaseHGVS",
"mutantNameCodon", "mutantNameAA", "mutantNameAAHGVS",
"sequenceAA", "mutationTypes",
"varLengths"),
colnames(tmpdf))) {
tmp <- mergeValues(tmpdf$mutantName, tmpdf[[v]]) %>%
stats::setNames(c("mutantName", v))
allSequences[[v]] <- tmp[[v]][match(allSequences$mutantName,
Expand Down
Binary file modified inst/extdata/GSE102901_cis_se.rds
Binary file not shown.
14 changes: 11 additions & 3 deletions man/digestFastqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 31 additions & 2 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,43 @@ BEGIN_RCPP
END_RCPP
}
// translateString
std::string translateString(std::string& s);
std::string translateString(const std::string& s);
RcppExport SEXP _mutscan_translateString(SEXP sSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type s(sSEXP);
Rcpp::traits::input_parameter< const std::string& >::type s(sSEXP);
rcpp_result_gen = Rcpp::wrap(translateString(s));
return rcpp_result_gen;
END_RCPP
}
// makeBaseHGVS
std::string makeBaseHGVS(const std::vector<std::string> mutationsSorted, const std::string mutNameDelimiter, const std::string wtSeq, const std::string varSeq);
RcppExport SEXP _mutscan_makeBaseHGVS(SEXP mutationsSortedSEXP, SEXP mutNameDelimiterSEXP, SEXP wtSeqSEXP, SEXP varSeqSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::vector<std::string> >::type mutationsSorted(mutationsSortedSEXP);
Rcpp::traits::input_parameter< const std::string >::type mutNameDelimiter(mutNameDelimiterSEXP);
Rcpp::traits::input_parameter< const std::string >::type wtSeq(wtSeqSEXP);
Rcpp::traits::input_parameter< const std::string >::type varSeq(varSeqSEXP);
rcpp_result_gen = Rcpp::wrap(makeBaseHGVS(mutationsSorted, mutNameDelimiter, wtSeq, varSeq));
return rcpp_result_gen;
END_RCPP
}
// test_makeAAHGVS
std::string test_makeAAHGVS(const std::vector<std::string> mutationsSorted, const std::string mutNameDelimiter, const std::string wtSeq);
RcppExport SEXP _mutscan_test_makeAAHGVS(SEXP mutationsSortedSEXP, SEXP mutNameDelimiterSEXP, SEXP wtSeqSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::vector<std::string> >::type mutationsSorted(mutationsSortedSEXP);
Rcpp::traits::input_parameter< const std::string >::type mutNameDelimiter(mutNameDelimiterSEXP);
Rcpp::traits::input_parameter< const std::string >::type wtSeq(wtSeqSEXP);
rcpp_result_gen = Rcpp::wrap(test_makeAAHGVS(mutationsSorted, mutNameDelimiter, wtSeq));
return rcpp_result_gen;
END_RCPP
}
// test_decomposeRead
List test_decomposeRead(const std::string sseq, const std::string squal, const std::string elements, const std::vector<int> elementLengths, const std::vector<std::string> primerSeqs, std::string umiSeq, std::string varSeq, std::string varQual, std::vector<int> varLengths, std::string constSeq, std::string constQual, int nNoPrimer, int nReadWrongLength);
RcppExport SEXP _mutscan_test_decomposeRead(SEXP sseqSEXP, SEXP squalSEXP, SEXP elementsSEXP, SEXP elementLengthsSEXP, SEXP primerSeqsSEXP, SEXP umiSeqSEXP, SEXP varSeqSEXP, SEXP varQualSEXP, SEXP varLengthsSEXP, SEXP constSeqSEXP, SEXP constQualSEXP, SEXP nNoPrimerSEXP, SEXP nReadWrongLengthSEXP) {
Expand Down Expand Up @@ -243,6 +270,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_mutscan_calcNearestStringDist", (DL_FUNC) &_mutscan_calcNearestStringDist, 3},
{"_mutscan_compareCodonPositions", (DL_FUNC) &_mutscan_compareCodonPositions, 3},
{"_mutscan_translateString", (DL_FUNC) &_mutscan_translateString, 1},
{"_mutscan_makeBaseHGVS", (DL_FUNC) &_mutscan_makeBaseHGVS, 4},
{"_mutscan_test_makeAAHGVS", (DL_FUNC) &_mutscan_test_makeAAHGVS, 3},
{"_mutscan_test_decomposeRead", (DL_FUNC) &_mutscan_test_decomposeRead, 13},
{"_mutscan_test_mergeReadPairPartial", (DL_FUNC) &_mutscan_test_mergeReadPairPartial, 12},
{"_mutscan_findClosestRefSeq", (DL_FUNC) &_mutscan_findClosestRefSeq, 4},
Expand Down
Loading

0 comments on commit 2359969

Please sign in to comment.