Skip to content

Commit

Permalink
fix: check all tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dmartmillan committed Dec 21, 2021
1 parent 0178e40 commit fa4c17f
Show file tree
Hide file tree
Showing 50 changed files with 1,670,985 additions and 242,722 deletions.
2 changes: 1 addition & 1 deletion annotation_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pattern:
- '*.maf'
- '*.vcf'
- '*.tsv.gz'
recursive: true
#UNDER DEVELOPMENT - recursive: true
format: 'tsv'
delimiter: 't'

Expand Down
2 changes: 1 addition & 1 deletion annotation_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pattern: # List of file patterns to match with the current annotation; required
- string
- string
- ...
recursive: boolean # Explore annotations recursively to all subdirectories; default: False
# UNDER DEVELOPMENT - recursive: boolean # Explore annotations recursively to all subdirectories; default: False
format: string # Output format "tsv" or "csv"; default: "tsv"
delimiter: string # Delimiter of input files: t - tabular, c - coma; default: "t"

Expand Down
13 changes: 8 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
from os import getcwd

from openvariant.annotation.annotation import Annotation
from openvariant.task.count import count
from openvariant.task.groupby import group_by
from openvariant.variant.variant import Variant

annotation = Annotation('./tests/data/example.yaml')
result = Variant('./tests', annotation)
for r in result.read():
print(r)
print(result)
for g, v, _ in group_by(f'{os.getcwd()}/tests/data/dataset/', f'{os.getcwd()}/tests/data/task_test.yaml',
None, key_by='DATASET', where="PROJECT >= \"SAMPLE1\"", quite=True):
print(g, len(v))
51 changes: 3 additions & 48 deletions openvariant/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,8 @@
from yaml import safe_load, YAMLError

from openvariant.annotation.builder import AnnotationTypesBuilders
from openvariant.config.config_annotation import (AnnotationGeneralKeys,
AnnotationKeys,
AnnotationTypes,
ExcludesKeys,
DEFAULT_FORMAT,
DEFAULT_DELIMITER,
from openvariant.config.config_annotation import (AnnotationGeneralKeys, AnnotationKeys, AnnotationTypes,
ExcludesKeys, DEFAULT_FORMAT, DEFAULT_DELIMITER,
DEFAULT_COLUMNS, AnnotationFormat, AnnotationDelimiter)


Expand Down Expand Up @@ -117,8 +113,7 @@ def _check_annotation_keys(annot: dict) -> None:
not isinstance(annot[AnnotationKeys.FIELD_MAPPING.value], str) or
not isinstance(annot[AnnotationKeys.FILE_MAPPING.value], str) or
not isinstance(annot[AnnotationKeys.FIELD_VALUE.value], str)):
raise KeyError(
f"'{AnnotationTypes.MAPPING.value}' not annotated well.")
raise KeyError(f"'{AnnotationTypes.MAPPING.value}' not annotated well.")


class Annotation:
Expand Down Expand Up @@ -159,10 +154,6 @@ def _check_columns(self) -> None:
if col not in self._annotations:
raise KeyError(f"'{col}' column unable to find.")

# @property
# def recursive(self) -> bool:
# return self._recursive

@property
def patterns(self) -> List[str]:
return self._patterns
Expand All @@ -189,42 +180,6 @@ def excludes(self) -> List:

@property
def structure(self) -> dict:

structure_aux = {AnnotationGeneralKeys.ANNOTATION.name: self._annotations,
AnnotationGeneralKeys.EXCLUDE.name: self._excludes}
return {e: structure_aux for e in self._patterns}


'''
def merge_annotations_structure(ann_a: Annotation, ann_b: Annotation) -> Annotation:
"""
:param ann_a: The first Annotation. This annotation
has preference and will override B annotation if there is a conflict
:param ann_b: The second Annotation.
:return: The merge of A and B annotation
"""
if ann_a is None:
return copy.deepcopy(ann_b)
elif ann_b is None:
return copy.deepcopy(ann_a)
ann_aa = copy.deepcopy(ann_a)
ann_aa.set_patterns(list(set(ann_aa.patterns).union(set(ann_b.patterns))))
excludes_total = ann_aa.excludes
for k in ann_b.excludes:
if k not in excludes_total:
excludes_total.append(k)
ann_aa.set_excludes(excludes_total)
aa = {k: v for k, v in ann_aa.annotations.items()}
for k, v in ann_b.annotations.items():
if k not in list(aa.keys()):
aa[k] = v
ann_aa.set_annotations(aa)
return ann_aa
'''
11 changes: 7 additions & 4 deletions openvariant/annotation/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,15 @@ def _dirname_parser(x: DirnameBuilder, line: List = None, original_header: List

def _plugin_parser(x: PluginBuilder, line: List = None, original_header: List = None, path: str = None,
dict_line: dict = None) -> dict:
if x[2] is None:
raise KeyError("Unable to get plugin\'s function")
if dict_line is None:
dict_line = {}
try:
if dict_line is None:
dict_line = {}
value = x[2](dict_line)
except KeyError as e:
raise KeyError(f'Something went wrong on the plugin: {e}')
except Exception as e:
raise Exception(f'Something went wrong on the plugin: {e}')

#if len(x[1]) != 0:
# value = _get_text_from_header(x[1], line, original_header, None)
return value if value is not None else str(float('nan'))
Expand Down
2 changes: 0 additions & 2 deletions openvariant/task/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def _count_task(selection: Tuple[str, Annotation], group_by: str, where: str) ->
return i, None
else:
groups = {}
# key_not_found = False
for r in result.read(group_by):
if skip(r, where_clauses):
continue
Expand All @@ -38,7 +37,6 @@ def _count_task(selection: Tuple[str, Annotation], group_by: str, where: str) ->
i += 1
except (ValueError, KeyError):
pass

return i, groups


Expand Down
12 changes: 7 additions & 5 deletions openvariant/task/openvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def cat(input_path: str, where: str or None, annotations: str or None, header: b
@click.option('--cores', '-c', help='Maximum processes to run in parallel.', type=click.INT, default=cpu_count())
@click.option('--quite', '-q', help="Don't show the progress, only the total count.", is_flag=True)
def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None) -> None:
print(group_by)
result = count_task(input_path, annotations, group_by=group_by, where=where, cores=cores, quite=quite)

print(result)
print(group_by)
if len(result[1]) > 0:
for k, v in sorted(result[1].items(), key=lambda res: res[1]):
print("{}\t{}".format(k, v))
Expand All @@ -44,13 +46,13 @@ def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, a
@click.argument('input_path', type=click.Path(exists=True), default='.')
@click.option('--header', help='Send header as first row', is_flag=True)
@click.option('--show', help='Show group by each row', is_flag=True)
@click.option('--group_by', '-g', type=click.STRING)
@click.option('--where', '-w', type=click.STRING)
@click.option('--script', '-s', type=click.STRING)
@click.option('--group_by', '-g', type=click.STRING, default=None)
@click.option('--where', '-w', type=click.STRING, default=None)
@click.option('--script', '-s', type=click.STRING, default=None)
@click.option('--annotations', '-a', default=None, type=click.Path(exists=True))
@click.option('--cores', '-c', help='Maximum processes to run in parallel.', type=click.INT, default=cpu_count())
@click.option('--quite', '-q', help="Don't show the progress, only the total count.", is_flag=True)
def groupby(input_path: str, script: str, where: str, group_by: str, cores: int, quite: bool, annotations: str,
def groupby(input_path: str, script: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None,
header: bool, show: bool):
for group_key, group_result, command in group_by_task(input_path, annotations, script, key_by=group_by, where=where,
cores=cores, quite=quite, header=header):
Expand Down
9 changes: 6 additions & 3 deletions openvariant/variant/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,15 @@ def _parser(file: str, annotation: dict, delimiter: str, columns: List, excludes

if len(columns) != 0:
if group_by is not None and group_by not in columns:
row_aux[group_by] = row[group_by]
try:
row_aux[group_by] = row[group_by]
except KeyError as e:
raise KeyError(f"Unable to find group by: {e}. Check annotation for {file} file")

for col in columns:
row_aux[col] = row[col]

row = row_aux

except (ValueError, IndexError, KeyError) as e:
raise ValueError(f"Error parsing line: {lnum} {file}: {e}")

Expand Down Expand Up @@ -196,7 +199,7 @@ def read(self, group_key=None) -> Generator[dict, None, None]:
if i != 0:
yield line

def save(self, file_path: str, display_header=True):
def save(self, file_path: str or None, display_header=True):
if file_path is None or isdir(file_path):
raise ValueError("The path must be a file.")
with open(file_path, "w") as file:
Expand Down
2 changes: 1 addition & 1 deletion tests/data/annotation/invalid_pattern.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pattern: '*.vcf.gz'
pattern: 1234
recursive: false
format: 'CSV'

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pattern:
- '*.maf'
recursive: true
format: 'tsv'

annotation:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
28 changes: 28 additions & 0 deletions tests/data/dataset/sample2/sample2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
pattern:
- '*.maf'
format: 'tsv'

annotation:
- type: 'static'
field: 'PLATFORM'
value: 'WGS'
- type: 'internal'
field: 'POSITION'
fieldSource:
- 'Position'
- 'Start'
- 'Start_Position'
- 'Pos'
- 'Chromosome_Start'
- 'POS'
- type: 'internal'
field: 'variant'
fieldSource:
- 'Variant_Type'
- 'Data'
- type: 'filename'
field: 'DATASET'
function: 'lambda x: "{}".format(x.lower()[:-4])'
- type: 'dirname'
field: 'PROJECT'
function: 'lambda x: "{}".format(x.upper())'
67 changes: 67 additions & 0 deletions tests/data/dataset/sample3/sample3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
pattern:
- data_mutations_extended.txt
format: tsv

columns:
- CHROMOSOME
- POSITION
- REF
- ALT
- STRAND
- ALT_TYPE
- PLATFORM

annotation:
- type: internal
field: CHROMOSOME
fieldSource:
- Chromosome
- Chr
- Chrom
- Chromosome_Name
- '#chrom'
- '#CHROM'
function: 'lambda c: c.upper().replace(''CHR'', '''').replace(''23'', ''X'').replace(''24'', ''Y'')'
- type: internal
field: POSITION
fieldSource:
- Position
- Start
- Start_Position
- Pos
- Chromosome_Start
- POS
- type: internal
field: STRAND
fieldSource:
- Strand
- Chromosome_Strand
- ''
function: 'lambda s: ''-'' if s in [''-'', ''0'', ''-1''] else ''+'' if s in [''+'', ''1'', ''+1''] else ''+'''
- type: internal
field: REF
fieldSource:
- Ref
- Reference_Allele
- Reference
- Reference_Genome_Allele
- REF
- type: internal
field: ALT
fieldSource:
- Alt
- Tumor_Seq_Allele2
- Variant
- Alternative
- Mutated_To_Allele
- ALT
- type: static
field: PLATFORM
value: WXS
- type: dirname
field: DATASET
function: 'lambda d: "CBIOP_{{PLATFORM}}_{}".format(d.upper())'
- type: plugin
plugin: alteration_type
field: ALT_TYPE

2 changes: 1 addition & 1 deletion tests/data/example.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pattern:
patternexample1:
- '*.maf'
- '*.vcf.gz'
recursive: false
Expand Down
21 changes: 0 additions & 21 deletions tests/data/example2/example2.yaml

This file was deleted.

15 changes: 0 additions & 15 deletions tests/data/example3/example3.yaml

This file was deleted.

35 changes: 0 additions & 35 deletions tests/data/example4/internationalgenome-example.vcf

This file was deleted.

Binary file not shown.
Loading

0 comments on commit fa4c17f

Please sign in to comment.