Skip to content

Commit

Permalink
feat: infer categories of categorical columns (#229)
Browse files Browse the repository at this point in the history
feat: allow lineup to infer categories
  • Loading branch information
oltionchampari authored Oct 13, 2023
1 parent 3690eea commit 71953f8
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 29 deletions.
13 changes: 7 additions & 6 deletions src/common/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,7 @@ export const tissue: IDataSourceConfig = {
},
};

function toChromosomes(categories: (string | Partial<ICategory>)[]) {
const mappedCategories: Partial<ICategory>[] = categories.map((category) => (typeof category === 'string' ? { name: category, label: category } : category));

function toChromosomes(categories: Partial<ICategory>[]) {
const order = new Map<string, number>();
for (let i = 1; i <= 22; ++i) {
order.set(String(i), i);
Expand All @@ -196,7 +194,7 @@ function toChromosomes(categories: (string | Partial<ICategory>)[]) {
order.set('y', 24);
order.set('mt', 25);

mappedCategories.sort((a, b) => {
categories.sort((a, b) => {
const an = a.label.toLowerCase();
const bn = b.label.toLowerCase();
const ai = order.get(an);
Expand All @@ -213,7 +211,7 @@ function toChromosomes(categories: (string | Partial<ICategory>)[]) {
return ai - bi;
});

return mappedCategories.map((d, i) => ({ name: d.name, label: d.label, value: i }));
return categories;
}

export const gene: IDataSourceConfig = {
Expand All @@ -231,7 +229,10 @@ export const gene: IDataSourceConfig = {
ColumnDescUtils.stringCol('symbol', { label: 'Symbol', width: 120 }),
ColumnDescUtils.stringCol('id', { label: 'Ensembl' }),
ColumnDescUtils.stringCol('name', { label: 'Name' }),
ColumnDescUtils.categoricalCol('chromosome', toChromosomes(find('chromosome').categories), { label: 'Chromosome' }),
ColumnDescUtils.categoricalCol('chromosome', find('chromosome').categories, {
label: 'Chromosome',
extras: { categoryOrder: toChromosomes },
}),
ColumnDescUtils.categoricalCol('biotype', find('biotype').categories, { label: 'Biotype' }),
ColumnDescUtils.categoricalCol(
'strand',
Expand Down
45 changes: 22 additions & 23 deletions tdp_publicdb/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


def _gene_columns(query):
return query.column("biotype", type="categorical").column("chromosome", type="categorical")
return query.column("biotype", type="categorical", categories=None).column("chromosome", type="categorical", categories=None)


gene = Entity(
Expand All @@ -37,31 +37,30 @@ def _gene_columns(query):

def _tissue_columns(query):
return (
query.column("species", type="categorical")
.column("tumortype", type="categorical")
.column("organ", type="categorical")
.column("gender", type="categorical")
query.column("species", type="categorical", categories=None)
.column("tumortype", type="categorical", categories=None)
.column("organ", type="categorical", categories=None)
.column("gender", type="categorical", categories=None)
.column("tumortype_adjacent", type="string")
.column("vendorname", type="categorical")
.column("race", type="categorical")
.column("ethnicity", type="categorical")
.column("vendorname", type="categorical", categories=None)
.column("race", type="categorical", categories=None)
.column("ethnicity", type="categorical", categories=None)
.column("age", type="number")
.column("days_to_death", type="number")
.column("days_to_last_followup", type="number")
.column(
"vital_status",
type="categorical",
categories=[dict(name="true", label="Alive", color="white"), dict(name="false", label="Deceased", color="black")],
)
.column("height", type="number")
.column("weight", type="number")
.column("bmi", type="number")
.column("tumorpurity", type="number")
.column("microsatellite_stability_score", type="number")
.column("microsatellite_stability_class", type="categorical")
.column("microsatellite_stability_class", type="categorical", categories=None)
.column("mutational_fraction", type="number")
.column("hla_a_allele1", type="categorical")
.column("hla_a_allele2", type="categorical")
.column("hla_a_allele1", type="categorical", categories=None)
.column("hla_a_allele2", type="categorical", categories=None)
)


Expand Down Expand Up @@ -106,20 +105,20 @@ def _tissue_columns(query):

def _cellline_columns(query):
return (
query.column("tumortype", type="categorical")
.column("organ", type="categorical")
.column("gender", type="categorical")
.column("metastatic_site", type="categorical")
.column("histology_type", type="categorical")
.column("morphology", type="categorical")
.column("growth_type", type="categorical")
.column("age_at_surgery", type="categorical")
query.column("tumortype", type="categorical", categories=None)
.column("organ", type="categorical", categories=None)
.column("gender", type="categorical", categories=None)
.column("metastatic_site", type="categorical", categories=None)
.column("histology_type", type="categorical", categories=None)
.column("morphology", type="categorical", categories=None)
.column("growth_type", type="categorical", categories=None)
.column("age_at_surgery", type="categorical", categories=None)
.column("cosmicid", type="number")
.column("mutational_fraction", type="number")
.column("microsatellite_stability_score", type="number")
.column("microsatellite_stability_class", type="categorical")
.column("hla_a_allele1", type="categorical")
.column("hla_a_allele2", type="categorical")
.column("microsatellite_stability_class", type="categorical", categories=None)
.column("hla_a_allele1", type="categorical", categories=None)
.column("hla_a_allele2", type="categorical", categories=None)
)


Expand Down

0 comments on commit 71953f8

Please sign in to comment.