From 37b3c57381407b788aa84a6abe64a3a59d56efa8 Mon Sep 17 00:00:00 2001 From: AtaJadidAhari Date: Mon, 14 Oct 2024 17:31:59 +0200 Subject: [PATCH] set non-external samples' GENE_ANNOTATION to null if provided --- drop/config/SampleAnnotation.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drop/config/SampleAnnotation.py b/drop/config/SampleAnnotation.py index 90a81b2a..485d3e65 100644 --- a/drop/config/SampleAnnotation.py +++ b/drop/config/SampleAnnotation.py @@ -33,7 +33,8 @@ def __init__(self, file, root, genome): # external counts self.extGeneCountIDs = self.createGroupIds(file_type="GENE_COUNTS_FILE", sep=',') self.extSpliceCountIDs = self.createGroupIds(file_type="SPLICE_COUNTS_DIR", sep=',') - + self.checkNonExternalGeneAnnotation() + def parse(self, sep='\t'): """ read and check sample annotation for missing columns @@ -329,3 +330,10 @@ def getIDsByGroup(self, group, assay="RNA"): def getSampleIDs(self, file_type): ids = self.subsetFileMapping(file_type)["ID"] return list(ids) + + def checkNonExternalGeneAnnotation(self): + external_groups = set([g for g in self.extGeneCountIDs if len(self.extGeneCountIDs[g]) > 0]) + non_external_samples = self.annotationTable[self.annotationTable['DROP_GROUP'].isin(external_groups) == False] + if sum(non_external_samples['GENE_ANNOTATION'].isna() == False) > 0: + logger.info("WARNING: Found %d samples that had `GENE_ANNOTATION` provided in sample annotation table but are not external samples. The provided GENE_ANNOTATIONs are ignored.\n" % (sum(non_external_samples['GENE_ANNOTATION'].isna() == False))) + self.annotationTable.loc[self.annotationTable['DROP_GROUP'].isin(non_external_samples) == False, "GENE_ANNOTATION"] = ""