From a94009a5643b01f24069bf8cc89b9c3eaa35fef6 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 17 Nov 2024 19:17:01 +0100 Subject: [PATCH 01/17] Fix pluginsplit --- .../pluginsplit/bcftools-pluginsplit.diff | 15 ++++++++++++++- modules/nf-core/bcftools/pluginsplit/main.nf | 8 ++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff index f06044e4..061e70e4 100644 --- a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff +++ b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff @@ -3,6 +3,19 @@ Changes in module 'nf-core/bcftools/pluginsplit' Changes in 'bcftools/pluginsplit/main.nf': --- modules/nf-core/bcftools/pluginsplit/main.nf +++ modules/nf-core/bcftools/pluginsplit/main.nf +@@ -15,9 +15,9 @@ + path(targets) + + output: +- tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf +- tuple val(meta), path("*.tbi") , emit: tbi, optional: true +- tuple val(meta), path("*.csi") , emit: csi, optional: true ++ tuple val(meta), path("*/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf ++ tuple val(meta), path("*/*.tbi") , emit: tbi, optional: true ++ tuple val(meta), path("*/*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: @@ -26,11 +26,17 @@ script: def args = task.ext.args ?: '' @@ -35,7 +48,7 @@ Changes in 'bcftools/pluginsplit/main.nf': + extension="\${base_name#\${name_before_dot}}" + # Construct the new name + new_name="\${name_before_dot}${suffix}\${extension}" -+ mv "\$file" "./\$new_name" ++ mv "\$file" "${prefix}/\$new_name" + done cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index 8b493087..dce34f5f 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -15,9 +15,9 @@ process BCFTOOLS_PLUGINSPLIT { path(targets) output: - tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi, optional: true - tuple val(meta), path("*.csi") , emit: csi, optional: true + tuple val(meta), path("*/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*/*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*/*.csi") , emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -57,7 +57,7 @@ process BCFTOOLS_PLUGINSPLIT { extension="\${base_name#\${name_before_dot}}" # Construct the new name new_name="\${name_before_dot}${suffix}\${extension}" - mv "\$file" "./\$new_name" + mv "\$file" "${prefix}/\$new_name" done cat <<-END_VERSIONS > versions.yml From 4e0ce652ece03c8254cc076467bd24ceb6e70c05 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 17 Nov 2024 19:32:19 +0100 Subject: [PATCH 02/17] Update vcf_split_bcftools --- .../vcf_split_bcftools/tests/main.nf.test | 58 +++++++++++++++++-- .../tests/main.nf.test.snap | 29 ++++++++-- .../vcf_split_bcftools/tests/nextflow.config | 3 + 3 files changed, 80 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test index 46c09660..1864bb76 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/split" - test("Split vcf file") { + test("Split multiple vcf file") { setup { run("BCFTOOLS_MERGE") { script "../../../../modules/nf-core/bcftools/merge/main.nf" @@ -23,7 +23,7 @@ nextflow_workflow { """ input[0] = Channel.of( [ - [id: "allSamples"], + [id: "allSamples.batch0"], [file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s_imputed.bcf", checkIfExist:true), file(params.pipelines_testdata_base_path + "hum_data/individuals/NA20359/NA20359.s_imputed.bcf", checkIfExist:true)], @@ -43,10 +43,6 @@ nextflow_workflow { } } when { - params { - max_cpus = 2 - max_memory = '2.GB' - } workflow { """ input[0] = BCFTOOLS_MERGE.out.vcf.join(BCFTOOLS_MERGE.out.tbi) @@ -74,4 +70,54 @@ nextflow_workflow { ) } } + + test("Split one sample vcf file") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: 'NA12878'], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi", checkIfExist:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] } + ).match() } + ) + } + } + + test("Split no sample vcf file") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: 'dbsnp_146.hg38'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi", checkIfExist:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("No samples to split: dbsnp_146.hg38.vcf.gz") + } + ) + } + } } diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap index d3bd681e..02a8da3a 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap @@ -1,5 +1,26 @@ { - "Split vcf file": { + "Split one sample vcf file": { + "content": [ + [ + "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" + ], + [ + [ + { + "id": "NA12878" + }, + "NA12878.vcf.gz", + "NA12878.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-13T14:06:59.745818945" + }, + "Split multiple vcf file": { "content": [ [ "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" @@ -34,9 +55,9 @@ ] ], "meta": { - "nf-test": "0.9.1", + "nf-test": "0.9.2", "nextflow": "24.10.0" }, - "timestamp": "2024-11-07T14:13:06.801872176" + "timestamp": "2024-11-11T14:42:48.076610625" } -} \ No newline at end of file +} diff --git a/subworkflows/local/vcf_split_bcftools/tests/nextflow.config b/subworkflows/local/vcf_split_bcftools/tests/nextflow.config index 523678dc..a2282fbf 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/nextflow.config +++ b/subworkflows/local/vcf_split_bcftools/tests/nextflow.config @@ -1,7 +1,10 @@ process { + resourceLimits = [cpus: 2, memory: '2.GB'] + withName: BCFTOOLS_MERGE { ext.args = ["--write-index=tbi", "--output-type z"].join(' ') } + withName: BCFTOOLS_PLUGINSPLIT { ext.args = ["--write-index=tbi", "--output-type z"].join(' ') } From 04a12c2a5ac4f0d3ed33135203383ac5b767cbac Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 18 Nov 2024 14:10:04 +0100 Subject: [PATCH 03/17] Fix config --- conf/modules.config | 3 +- .../pluginsplit/bcftools-pluginsplit.diff | 100 +++++++++++++----- modules/nf-core/bcftools/pluginsplit/main.nf | 68 ++++++++---- 3 files changed, 122 insertions(+), 49 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 693570cf..40c9af9e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -88,7 +88,8 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SPLIT_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}_${meta.batch}" } + ext.prefix = { "${meta.id}.${meta.batch}" } + ext.suffix = { ".batch${meta.batch}.${meta.tools}" } publishDir = [ path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, mode: params.publish_dir_mode, diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff index 061e70e4..158c59af 100644 --- a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff +++ b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff @@ -3,20 +3,22 @@ Changes in module 'nf-core/bcftools/pluginsplit' Changes in 'bcftools/pluginsplit/main.nf': --- modules/nf-core/bcftools/pluginsplit/main.nf +++ modules/nf-core/bcftools/pluginsplit/main.nf -@@ -15,9 +15,9 @@ +@@ -15,10 +15,10 @@ path(targets) output: - tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi, optional: true - tuple val(meta), path("*.csi") , emit: csi, optional: true +- path "versions.yml" , emit: versions + tuple val(meta), path("*/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*/*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*/*.csi") , emit: csi, optional: true - path "versions.yml" , emit: versions ++ path "versions.yml" , emit: versions when: -@@ -26,11 +26,17 @@ + task.ext.when == null || task.ext.when +@@ -26,6 +26,7 @@ script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -24,36 +26,82 @@ Changes in 'bcftools/pluginsplit/main.nf': def samples_arg = samples ? "--samples-file ${samples}" : "" def groups_arg = groups ? "--groups-file ${groups}" : "" - def regions_arg = regions ? "--regions-file ${regions}" : "" - def targets_arg = targets ? "--targets-file ${targets}" : "" -+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : -+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : -+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : -+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : -+ "vcf" - - """ - bcftools plugin split \\ -@@ -42,7 +48,17 @@ +@@ -42,7 +43,22 @@ ${targets_arg} \\ --output ${prefix} - mv ${prefix}/* . -+ for file in ${prefix}/*; do -+ # Extract the basename -+ base_name=\$(basename "\$file") -+ # Extract the part of the basename before the first dot -+ name_before_dot="\${base_name%%.*}" -+ # Extract the extension -+ extension="\${base_name#\${name_before_dot}}" -+ # Construct the new name -+ new_name="\${name_before_dot}${suffix}\${extension}" -+ mv "\$file" "${prefix}/\$new_name" -+ done ++ if [ -n "${suffix}" ]; then ++ for file in ${prefix}/*; do ++ # Extract the basename ++ base_name=\$(basename "\$file") ++ # Extract the extension ++ extension="" ++ # Remove the extension if it exists ++ if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then ++ extension="\${BASH_REMATCH[0]}" ++ base_name="\${base_name%\$extension}" ++ fi ++ # Construct the new name ++ new_name="\${base_name}${suffix}\${extension}" ++ mv "\$file" "${prefix}/\$new_name" ++ done ++ fi cat <<-END_VERSIONS > versions.yml "${task.process}": - +@@ -53,6 +69,7 @@ + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" ++ def suffix = task.ext.suffix ?: "" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : +@@ -65,15 +82,38 @@ + "" + def determination_file = samples ?: targets + def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " +- def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" +- def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( files.txt ++ while IFS= read -r filename; ++ do ${create_cmd} "${prefix}/\$filename"; ++ if [ -n "${index}" ]; then ++ index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); ++ touch ${prefix}/\$index_file; ++ fi; ++ done < files.txt ++ ++ if [ -n "${suffix}" ]; then ++ for file in ${prefix}/*; do ++ # Extract the basename ++ base_name=\$(basename "\$file") ++ # Extract the extension ++ extension="" ++ # Remove the extension if it exists ++ if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then ++ extension="\${BASH_REMATCH[0]}" ++ base_name="\${base_name%\$extension}" ++ fi ++ # Construct the new name ++ new_name="\${base_name}${suffix}\${extension}" ++ mv "\$file" "${prefix}/\$new_name" ++ done ++ fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +-} ++} 'modules/nf-core/bcftools/pluginsplit/meta.yml' is unchanged 'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test' is unchanged 'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap' is unchanged diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index dce34f5f..0e0fb840 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -18,7 +18,7 @@ process BCFTOOLS_PLUGINSPLIT { tuple val(meta), path("*/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf tuple val(meta), path("*/*.tbi") , emit: tbi, optional: true tuple val(meta), path("*/*.csi") , emit: csi, optional: true - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -32,11 +32,6 @@ process BCFTOOLS_PLUGINSPLIT { def groups_arg = groups ? "--groups-file ${groups}" : "" def regions_arg = regions ? "--regions-file ${regions}" : "" def targets_arg = targets ? "--targets-file ${targets}" : "" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf" """ bcftools plugin split \\ @@ -48,17 +43,22 @@ process BCFTOOLS_PLUGINSPLIT { ${targets_arg} \\ --output ${prefix} - for file in ${prefix}/*; do - # Extract the basename - base_name=\$(basename "\$file") - # Extract the part of the basename before the first dot - name_before_dot="\${base_name%%.*}" - # Extract the extension - extension="\${base_name#\${name_before_dot}}" - # Construct the new name - new_name="\${name_before_dot}${suffix}\${extension}" - mv "\$file" "${prefix}/\$new_name" - done + if [ -n "${suffix}" ]; then + for file in ${prefix}/*; do + # Extract the basename + base_name=\$(basename "\$file") + # Extract the extension + extension="" + # Remove the extension if it exists + if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then + extension="\${BASH_REMATCH[0]}" + base_name="\${base_name%\$extension}" + fi + # Construct the new name + new_name="\${base_name}${suffix}\${extension}" + mv "\$file" "${prefix}/\$new_name" + done + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -69,6 +69,7 @@ process BCFTOOLS_PLUGINSPLIT { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : @@ -81,15 +82,38 @@ process BCFTOOLS_PLUGINSPLIT { "" def determination_file = samples ?: targets def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " - def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" - def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( files.txt + while IFS= read -r filename; + do ${create_cmd} "${prefix}/\$filename"; + if [ -n "${index}" ]; then + index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); + touch ${prefix}/\$index_file; + fi; + done < files.txt + + if [ -n "${suffix}" ]; then + for file in ${prefix}/*; do + # Extract the basename + base_name=\$(basename "\$file") + # Extract the extension + extension="" + # Remove the extension if it exists + if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then + extension="\${BASH_REMATCH[0]}" + base_name="\${base_name%\$extension}" + fi + # Construct the new name + new_name="\${base_name}${suffix}\${extension}" + mv "\$file" "${prefix}/\$new_name" + done + fi cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """ -} +} \ No newline at end of file From 8c1fff6684786b269c9de71f917c90f1ccdf9c1f Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 18 Nov 2024 17:44:03 +0100 Subject: [PATCH 04/17] Update pluginsplit --- conf/modules.config | 3 +- conf/steps/validation.config | 3 +- .../pluginsplit/bcftools-pluginsplit.diff | 110 ------------------ modules/nf-core/bcftools/pluginsplit/main.nf | 64 ++++------ nextflow.config | 48 ++++---- 5 files changed, 47 insertions(+), 181 deletions(-) delete mode 100644 modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff diff --git a/conf/modules.config b/conf/modules.config index 40c9af9e..2932d5f4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -88,8 +88,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SPLIT_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}.${meta.batch}" } - ext.suffix = { ".batch${meta.batch}.${meta.tools}" } + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.tools}" } publishDir = [ path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, mode: params.publish_dir_mode, diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 10c7df52..285221d6 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -75,8 +75,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}" } - ext.suffix = ".truth" + ext.prefix = { "${meta.id}.truth" } } // Validation subworkflow diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff deleted file mode 100644 index 158c59af..00000000 --- a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff +++ /dev/null @@ -1,110 +0,0 @@ -Changes in module 'nf-core/bcftools/pluginsplit' -'modules/nf-core/bcftools/pluginsplit/environment.yml' is unchanged -Changes in 'bcftools/pluginsplit/main.nf': ---- modules/nf-core/bcftools/pluginsplit/main.nf -+++ modules/nf-core/bcftools/pluginsplit/main.nf -@@ -15,10 +15,10 @@ - path(targets) - - output: -- tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf -- tuple val(meta), path("*.tbi") , emit: tbi, optional: true -- tuple val(meta), path("*.csi") , emit: csi, optional: true -- path "versions.yml" , emit: versions -+ tuple val(meta), path("*/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf -+ tuple val(meta), path("*/*.tbi") , emit: tbi, optional: true -+ tuple val(meta), path("*/*.csi") , emit: csi, optional: true -+ path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when -@@ -26,6 +26,7 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -+ def suffix = task.ext.suffix ?: "" - - def samples_arg = samples ? "--samples-file ${samples}" : "" - def groups_arg = groups ? "--groups-file ${groups}" : "" -@@ -42,7 +43,22 @@ - ${targets_arg} \\ - --output ${prefix} - -- mv ${prefix}/* . -+ if [ -n "${suffix}" ]; then -+ for file in ${prefix}/*; do -+ # Extract the basename -+ base_name=\$(basename "\$file") -+ # Extract the extension -+ extension="" -+ # Remove the extension if it exists -+ if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then -+ extension="\${BASH_REMATCH[0]}" -+ base_name="\${base_name%\$extension}" -+ fi -+ # Construct the new name -+ new_name="\${base_name}${suffix}\${extension}" -+ mv "\$file" "${prefix}/\$new_name" -+ done -+ fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": -@@ -53,6 +69,7 @@ - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -+ def suffix = task.ext.suffix ?: "" - - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : -@@ -65,15 +82,38 @@ - "" - def determination_file = samples ?: targets - def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " -- def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" -- def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( files.txt -+ while IFS= read -r filename; -+ do ${create_cmd} "${prefix}/\$filename"; -+ if [ -n "${index}" ]; then -+ index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); -+ touch ${prefix}/\$index_file; -+ fi; -+ done < files.txt -+ -+ if [ -n "${suffix}" ]; then -+ for file in ${prefix}/*; do -+ # Extract the basename -+ base_name=\$(basename "\$file") -+ # Extract the extension -+ extension="" -+ # Remove the extension if it exists -+ if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then -+ extension="\${BASH_REMATCH[0]}" -+ base_name="\${base_name%\$extension}" -+ fi -+ # Construct the new name -+ new_name="\${base_name}${suffix}\${extension}" -+ mv "\$file" "${prefix}/\$new_name" -+ done -+ fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ --} -+} -'modules/nf-core/bcftools/pluginsplit/meta.yml' is unchanged -'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test' is unchanged -'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap' is unchanged -'modules/nf-core/bcftools/pluginsplit/tests/nextflow.config' is unchanged -'modules/nf-core/bcftools/pluginsplit/tests/tags.yml' is unchanged -************************************************************ diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index 0e0fb840..df4145fe 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -15,9 +15,9 @@ process BCFTOOLS_PLUGINSPLIT { path(targets) output: - tuple val(meta), path("*/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf - tuple val(meta), path("*/*.tbi") , emit: tbi, optional: true - tuple val(meta), path("*/*.csi") , emit: csi, optional: true + tuple val(meta), path("outputDir/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("outputDir/*.tbi") , emit: tbi, optional: true + tuple val(meta), path("outputDir/*.csi") , emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -25,8 +25,9 @@ process BCFTOOLS_PLUGINSPLIT { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "" + // Here the default prefix is an empty string as the filename is created by the plugin + // and the prefix is instead added to the output files in the script + def prefix = task.ext.prefix ?: "" def samples_arg = samples ? "--samples-file ${samples}" : "" def groups_arg = groups ? "--groups-file ${groups}" : "" @@ -41,22 +42,11 @@ process BCFTOOLS_PLUGINSPLIT { ${groups_arg} \\ ${regions_arg} \\ ${targets_arg} \\ - --output ${prefix} + --output outputDir - if [ -n "${suffix}" ]; then - for file in ${prefix}/*; do - # Extract the basename - base_name=\$(basename "\$file") - # Extract the extension - extension="" - # Remove the extension if it exists - if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then - extension="\${BASH_REMATCH[0]}" - base_name="\${base_name%\$extension}" - fi - # Construct the new name - new_name="\${base_name}${suffix}\${extension}" - mv "\$file" "${prefix}/\$new_name" + if [ -n "${prefix}" ]; then + for file in outputDir/*; do + mv \$file outputDir/${prefix}\${file##*/} done fi @@ -68,8 +58,7 @@ process BCFTOOLS_PLUGINSPLIT { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "" + def prefix = task.ext.prefix ?: "" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : @@ -83,31 +72,20 @@ process BCFTOOLS_PLUGINSPLIT { def determination_file = samples ?: targets def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " """ - mkdir -p ${prefix} + mkdir -p outputDir cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt while IFS= read -r filename; - do ${create_cmd} "${prefix}/\$filename"; - if [ -n "${index}" ]; then - index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); - touch ${prefix}/\$index_file; - fi; + do ${create_cmd} "outputDir/\$filename"; + if [ -n "${index}" ]; then + index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); + touch outputDir/\$index_file; + fi; done < files.txt - if [ -n "${suffix}" ]; then - for file in ${prefix}/*; do - # Extract the basename - base_name=\$(basename "\$file") - # Extract the extension - extension="" - # Remove the extension if it exists - if [[ "\$base_name" =~ \\.(vcf|bcf)(\\.gz)?(\\.tbi|\\.csi)?\$ ]]; then - extension="\${BASH_REMATCH[0]}" - base_name="\${base_name%\$extension}" - fi - # Construct the new name - new_name="\${base_name}${suffix}\${extension}" - mv "\$file" "${prefix}/\$new_name" + if [ -n "${prefix}" ]; then + for file in outputDir/*; do + mv \$file outputDir/${prefix}\${file##*/} done fi @@ -116,4 +94,4 @@ process BCFTOOLS_PLUGINSPLIT { bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """ -} \ No newline at end of file +} diff --git a/nextflow.config b/nextflow.config index 3e402c96..5fb08a09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -103,6 +103,30 @@ params { // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// initialisation step +includeConfig 'conf/steps/initialisation.config' + +// chrcheck workflow +includeConfig 'conf/steps/chrcheck.config' + +// simulation step +includeConfig 'conf/steps/simulation.config' + +// panel_prep step +includeConfig 'conf/steps/panel_prep.config' + +// imputation step +includeConfig 'conf/steps/imputation_glimpse1.config' +includeConfig 'conf/steps/imputation_quilt.config' +includeConfig 'conf/steps/imputation_stitch.config' +includeConfig 'conf/steps/imputation_glimpse2.config' + +// validation step +includeConfig 'conf/steps/validation.config' + profiles { debug { dumpHashes = true @@ -322,27 +346,3 @@ validation { afterText = validation.help.afterText } } - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// initialisation step -includeConfig 'conf/steps/initialisation.config' - -// chrcheck workflow -includeConfig 'conf/steps/chrcheck.config' - -// simulation step -includeConfig 'conf/steps/simulation.config' - -// panel_prep step -includeConfig 'conf/steps/panel_prep.config' - -// imputation step -includeConfig 'conf/steps/imputation_glimpse1.config' -includeConfig 'conf/steps/imputation_quilt.config' -includeConfig 'conf/steps/imputation_stitch.config' -includeConfig 'conf/steps/imputation_glimpse2.config' - -// validation step -includeConfig 'conf/steps/validation.config' From 8a7f9a154e7c3f5435083763ff3ccc0a90b3ef0a Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 18 Nov 2024 17:57:43 +0100 Subject: [PATCH 05/17] Fix splitting operation --- conf/modules.config | 2 +- conf/steps/validation.config | 2 +- subworkflows/local/vcf_split_bcftools/main.nf | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2932d5f4..804a5c37 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -88,7 +88,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SPLIT_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.tools}" } + ext.prefix = { "${meta.id}-batch${meta.batch}-${meta.tools}-" } publishDir = [ path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, mode: params.publish_dir_mode, diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 285221d6..3d5cc072 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -75,7 +75,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}.truth" } + ext.prefix = { "${meta.id}-truth-" } } // Validation subworkflow diff --git a/subworkflows/local/vcf_split_bcftools/main.nf b/subworkflows/local/vcf_split_bcftools/main.nf index 524b3e88..a0df60a6 100644 --- a/subworkflows/local/vcf_split_bcftools/main.nf +++ b/subworkflows/local/vcf_split_bcftools/main.nf @@ -13,11 +13,12 @@ workflow VCF_SPLIT_BCFTOOLS { ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf .transpose() - .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0]], vcf]} + .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0].tokenize("-")[-1]], vcf]} ch_tbi_samples = BCFTOOLS_PLUGINSPLIT.out.tbi .transpose() - .map{metaITC, tbi -> [metaITC + [id: tbi.getBaseName().tokenize(".")[0]], tbi]} + .view() + .map{metaITC, tbi -> [metaITC + [id: tbi.getBaseName().tokenize(".")[0].tokenize("-")[-1]], tbi]} ch_vcf_tbi_samples = ch_vcf_samples .join(ch_tbi_samples) From 7846c23564364823171f786ddd74bbea5c40a0f9 Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 18 Nov 2024 18:01:21 +0100 Subject: [PATCH 06/17] Fix splitting operation --- conf/modules.config | 2 +- subworkflows/local/vcf_split_bcftools/main.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 804a5c37..a6e466fd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -93,7 +93,7 @@ process { path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, mode: params.publish_dir_mode, enabled: true, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename.tokenize("-")[-1] } ] } diff --git a/subworkflows/local/vcf_split_bcftools/main.nf b/subworkflows/local/vcf_split_bcftools/main.nf index a0df60a6..2e6d741d 100644 --- a/subworkflows/local/vcf_split_bcftools/main.nf +++ b/subworkflows/local/vcf_split_bcftools/main.nf @@ -19,6 +19,7 @@ workflow VCF_SPLIT_BCFTOOLS { .transpose() .view() .map{metaITC, tbi -> [metaITC + [id: tbi.getBaseName().tokenize(".")[0].tokenize("-")[-1]], tbi]} + .view() ch_vcf_tbi_samples = ch_vcf_samples .join(ch_tbi_samples) From 2eb5f0057e3ce3dd137dcc75770466e1dd4f70f8 Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Mon, 18 Nov 2024 18:04:47 +0100 Subject: [PATCH 07/17] Fix splitting operation --- conf/steps/validation.config | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 3d5cc072..38da7887 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -65,19 +65,16 @@ process { } // Split by samples - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:.*' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { + ext.args = ["--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}-truth" } publishDir = [ path: { "${params.outdir}/validation/samples" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename.tokenize("-")[-1] } ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { - ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}-truth-" } - } - // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ From a2b15da10fe0ad2259efb30d2d1c3a4c7462cfc3 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 18 Nov 2024 23:02:20 +0100 Subject: [PATCH 08/17] Update truth split --- conf/steps/validation.config | 2 +- workflows/phaseimpute/tests/main.nf.test.snap | 54 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 38da7887..ffe908e5 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -67,7 +67,7 @@ process { // Split by samples withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}-truth" } + ext.prefix = { "${meta.id}-truth-" } publishDir = [ path: { "${params.outdir}/validation/samples" }, mode: params.publish_dir_mode, diff --git a/workflows/phaseimpute/tests/main.nf.test.snap b/workflows/phaseimpute/tests/main.nf.test.snap index 336b9585..163e702c 100644 --- a/workflows/phaseimpute/tests/main.nf.test.snap +++ b/workflows/phaseimpute/tests/main.nf.test.snap @@ -130,15 +130,15 @@ "validation/TestQuality.txt", "validation/concat/all.A-truth.vcf.gz", "validation/concat/all.A-truth.vcf.gz.tbi", - "validation/samples/NA12878.truth.vcf.gz", - "validation/samples/NA12878.truth.vcf.gz.tbi", - "validation/samples/NA19401.truth.vcf.gz", - "validation/samples/NA19401.truth.vcf.gz.tbi", - "validation/samples/NA20359.truth.vcf.gz", - "validation/samples/NA20359.truth.vcf.gz.tbi", - "validation/stats/NA12878.truth.bcftools_stats.txt", - "validation/stats/NA19401.truth.bcftools_stats.txt", - "validation/stats/NA20359.truth.bcftools_stats.txt" + "validation/samples/NA12878.vcf.gz", + "validation/samples/NA12878.vcf.gz.tbi", + "validation/samples/NA19401.vcf.gz", + "validation/samples/NA19401.vcf.gz.tbi", + "validation/samples/NA20359.vcf.gz", + "validation/samples/NA20359.vcf.gz.tbi", + "validation/stats/NA12878.bcftools_stats.txt", + "validation/stats/NA19401.bcftools_stats.txt", + "validation/stats/NA20359.bcftools_stats.txt" ], "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=false]", [ @@ -178,15 +178,15 @@ "validation/NA19401_SNP.txt", "validation/NA20359_SNP.txt", "validation/TestQuality.txt", - "validation/samples/NA12878.truth.vcf.gz", - "validation/samples/NA12878.truth.vcf.gz.tbi", - "validation/samples/NA19401.truth.vcf.gz", - "validation/samples/NA19401.truth.vcf.gz.tbi", - "validation/samples/NA20359.truth.vcf.gz", - "validation/samples/NA20359.truth.vcf.gz.tbi", - "validation/stats/NA12878.truth.bcftools_stats.txt", - "validation/stats/NA19401.truth.bcftools_stats.txt", - "validation/stats/NA20359.truth.bcftools_stats.txt" + "validation/samples/NA12878.vcf.gz", + "validation/samples/NA12878.vcf.gz.tbi", + "validation/samples/NA19401.vcf.gz", + "validation/samples/NA19401.vcf.gz.tbi", + "validation/samples/NA20359.vcf.gz", + "validation/samples/NA20359.vcf.gz.tbi", + "validation/stats/NA12878.bcftools_stats.txt", + "validation/stats/NA19401.bcftools_stats.txt", + "validation/stats/NA20359.bcftools_stats.txt" ] ], "meta": { @@ -256,15 +256,15 @@ "validation/NA20359_P1000GP_Tglimpse2_SNP.txt", "validation/NA20359_P1000GP_Tquilt_SNP.txt", "validation/TestQuality.txt", - "validation/samples/NA12878.truth.vcf.gz", - "validation/samples/NA12878.truth.vcf.gz.tbi", - "validation/samples/NA19401.truth.vcf.gz", - "validation/samples/NA19401.truth.vcf.gz.tbi", - "validation/samples/NA20359.truth.vcf.gz", - "validation/samples/NA20359.truth.vcf.gz.tbi", - "validation/stats/NA12878.truth.bcftools_stats.txt", - "validation/stats/NA19401.truth.bcftools_stats.txt", - "validation/stats/NA20359.truth.bcftools_stats.txt" + "validation/samples/NA12878.vcf.gz", + "validation/samples/NA12878.vcf.gz.tbi", + "validation/samples/NA19401.vcf.gz", + "validation/samples/NA19401.vcf.gz.tbi", + "validation/samples/NA20359.vcf.gz", + "validation/samples/NA20359.vcf.gz.tbi", + "validation/stats/NA12878.bcftools_stats.txt", + "validation/stats/NA19401.bcftools_stats.txt", + "validation/stats/NA20359.bcftools_stats.txt" ] ], "meta": { From c1b7f1ee978750de86a26a58391019b8aff08bbe Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 18 Nov 2024 23:48:40 +0100 Subject: [PATCH 09/17] Update snap --- docs/output.md | 2 +- workflows/phaseimpute/tests/main.nf.test.snap | 38 ++----------------- 2 files changed, 4 insertions(+), 36 deletions(-) diff --git a/docs/output.md b/docs/output.md index 6e6777a4..55dc7321 100644 --- a/docs/output.md +++ b/docs/output.md @@ -168,7 +168,7 @@ The results from `--steps validate` will have the following directory structure: - `samples/*.vcf.gz`: A VCF of each truth sample. - `samples/*.vcf.gz.tbi`: A tbi for the truth VCF. - `stats/`: - - `*.truth.bcftools_stats.txt`: The statistics of the truth VCF target file produced by [`BCFTOOLS_STATS`](https://samtools.github.io/bcftools/bcftools.html#stats.) + - `*.bcftools_stats.txt`: The statistics of the truth VCF target file produced by [`BCFTOOLS_STATS`](https://samtools.github.io/bcftools/bcftools.html#stats.) - `*.P_T_SNP.txt`: Concordance metrics of the SNP variants obtained with [`GLIMPSE2_CONCORDANCE`](https://odelaneau.github.io/GLIMPSE/docs/documentation/concordance/). - `AllSamples.txt`: Aggregation of the above `GLIMPSE_CONCORDANCE` output across samples and tools. diff --git a/workflows/phaseimpute/tests/main.nf.test.snap b/workflows/phaseimpute/tests/main.nf.test.snap index b5d30026..383192e4 100644 --- a/workflows/phaseimpute/tests/main.nf.test.snap +++ b/workflows/phaseimpute/tests/main.nf.test.snap @@ -123,9 +123,6 @@ "validation/samples/NA19401.vcf.gz.tbi", "validation/samples/NA20359.vcf.gz", "validation/samples/NA20359.vcf.gz.tbi", - "validation/stats/NA12878.bcftools_stats.txt", - "validation/stats/NA19401.bcftools_stats.txt", - "validation/stats/NA20359.bcftools_stats.txt" "validation/stats/AllSamples.txt", "validation/stats/NA12878.truth.bcftools_stats.txt", "validation/stats/NA12878_P1000GP_Tglimpse1_SNP.txt", @@ -172,30 +169,17 @@ "nf-test": "0.9.2", "nextflow": "24.10.0" }, - "timestamp": "2024-11-17T21:32:08.538026091" + "timestamp": "2024-11-18T23:36:12.634065627" }, "Check test_validate": { "content": [ [ - "validation/NA12878_SNP.txt", - "validation/NA19401_SNP.txt", - "validation/NA20359_SNP.txt", - "validation/TestQuality.txt", "validation/samples/NA12878.vcf.gz", "validation/samples/NA12878.vcf.gz.tbi", "validation/samples/NA19401.vcf.gz", "validation/samples/NA19401.vcf.gz.tbi", "validation/samples/NA20359.vcf.gz", "validation/samples/NA20359.vcf.gz.tbi", - "validation/stats/NA12878.bcftools_stats.txt", - "validation/stats/NA19401.bcftools_stats.txt", - "validation/stats/NA20359.bcftools_stats.txt" - "validation/samples/NA12878.truth.vcf.gz", - "validation/samples/NA12878.truth.vcf.gz.tbi", - "validation/samples/NA19401.truth.vcf.gz", - "validation/samples/NA19401.truth.vcf.gz.tbi", - "validation/samples/NA20359.truth.vcf.gz", - "validation/samples/NA20359.truth.vcf.gz.tbi", "validation/stats/AllSamples.txt", "validation/stats/NA12878.truth.bcftools_stats.txt", "validation/stats/NA12878_SNP.txt", @@ -209,7 +193,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.0" }, - "timestamp": "2024-11-06T21:16:53.209347713" + "timestamp": "2024-11-18T23:30:21.177496237" }, "Check test_batch": { "content": [ @@ -265,28 +249,12 @@ "NA20359" ], [ - "validation/NA12878_P1000GP_Tglimpse2_SNP.txt", - "validation/NA12878_P1000GP_Tquilt_SNP.txt", - "validation/NA19401_P1000GP_Tglimpse2_SNP.txt", - "validation/NA19401_P1000GP_Tquilt_SNP.txt", - "validation/NA20359_P1000GP_Tglimpse2_SNP.txt", - "validation/NA20359_P1000GP_Tquilt_SNP.txt", - "validation/TestQuality.txt", "validation/samples/NA12878.vcf.gz", "validation/samples/NA12878.vcf.gz.tbi", "validation/samples/NA19401.vcf.gz", "validation/samples/NA19401.vcf.gz.tbi", "validation/samples/NA20359.vcf.gz", "validation/samples/NA20359.vcf.gz.tbi", - "validation/stats/NA12878.bcftools_stats.txt", - "validation/stats/NA19401.bcftools_stats.txt", - "validation/stats/NA20359.bcftools_stats.txt" - "validation/samples/NA12878.truth.vcf.gz", - "validation/samples/NA12878.truth.vcf.gz.tbi", - "validation/samples/NA19401.truth.vcf.gz", - "validation/samples/NA19401.truth.vcf.gz.tbi", - "validation/samples/NA20359.truth.vcf.gz", - "validation/samples/NA20359.truth.vcf.gz.tbi", "validation/stats/AllSamples.txt", "validation/stats/NA12878.truth.bcftools_stats.txt", "validation/stats/NA12878_P1000GP_Tglimpse2_SNP.txt", @@ -303,7 +271,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.0" }, - "timestamp": "2024-11-06T21:03:44.505303287" + "timestamp": "2024-11-18T23:47:48.301571495" }, "Check test_quilt": { "content": [ From eeb80375973d7a92d66206660cd8581ee1967a16 Mon Sep 17 00:00:00 2001 From: Louis Le Nezet Date: Thu, 21 Nov 2024 16:19:40 +0100 Subject: [PATCH 10/17] Update bcftools pluginsplit --- conf/modules.config | 3 +- conf/steps/validation.config | 5 +-- modules/nf-core/bcftools/pluginsplit/main.nf | 42 ++++++------------- subworkflows/local/vcf_split_bcftools/main.nf | 6 +-- 4 files changed, 17 insertions(+), 39 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a6e466fd..65d412dd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -88,12 +88,11 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SPLIT_BCFTOOLS:BCFTOOLS_PLUGINSPLIT' { tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}-batch${meta.batch}-${meta.tools}-" } publishDir = [ path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, mode: params.publish_dir_mode, enabled: true, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename.tokenize("-")[-1] } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/conf/steps/validation.config b/conf/steps/validation.config index a059b360..2c023a07 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -67,11 +67,10 @@ process { // Split by samples withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { ext.args = ["--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}-truth-" } publishDir = [ path: { "${params.outdir}/validation/samples" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename.tokenize("-")[-1] } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -102,6 +101,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { ext.args2 = "'(NR == 1) || (FNR > 1)'" // Skip header line ext.suffix = { "txt" } - tag = {"Test Quality"} + tag = {"${meta.id}"} } } diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index df4145fe..082802be 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -15,19 +15,17 @@ process BCFTOOLS_PLUGINSPLIT { path(targets) output: - tuple val(meta), path("outputDir/*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf - tuple val(meta), path("outputDir/*.tbi") , emit: tbi, optional: true - tuple val(meta), path("outputDir/*.csi") , emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - // Here the default prefix is an empty string as the filename is created by the plugin - // and the prefix is instead added to the output files in the script - def prefix = task.ext.prefix ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" def samples_arg = samples ? "--samples-file ${samples}" : "" def groups_arg = groups ? "--groups-file ${groups}" : "" @@ -42,13 +40,9 @@ process BCFTOOLS_PLUGINSPLIT { ${groups_arg} \\ ${regions_arg} \\ ${targets_arg} \\ - --output outputDir + --output ${prefix} - if [ -n "${prefix}" ]; then - for file in outputDir/*; do - mv \$file outputDir/${prefix}\${file##*/} - done - fi + mv ${prefix}/* . cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -58,7 +52,7 @@ process BCFTOOLS_PLUGINSPLIT { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : @@ -71,23 +65,11 @@ process BCFTOOLS_PLUGINSPLIT { "" def determination_file = samples ?: targets def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " + def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" + def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( files.txt - while IFS= read -r filename; - do ${create_cmd} "outputDir/\$filename"; - if [ -n "${index}" ]; then - index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); - touch outputDir/\$index_file; - fi; - done < files.txt - - if [ -n "${prefix}" ]; then - for file in outputDir/*; do - mv \$file outputDir/${prefix}\${file##*/} - done - fi + ${create_files} + ${create_index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/vcf_split_bcftools/main.nf b/subworkflows/local/vcf_split_bcftools/main.nf index 2e6d741d..524b3e88 100644 --- a/subworkflows/local/vcf_split_bcftools/main.nf +++ b/subworkflows/local/vcf_split_bcftools/main.nf @@ -13,13 +13,11 @@ workflow VCF_SPLIT_BCFTOOLS { ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf .transpose() - .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0].tokenize("-")[-1]], vcf]} + .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0]], vcf]} ch_tbi_samples = BCFTOOLS_PLUGINSPLIT.out.tbi .transpose() - .view() - .map{metaITC, tbi -> [metaITC + [id: tbi.getBaseName().tokenize(".")[0].tokenize("-")[-1]], tbi]} - .view() + .map{metaITC, tbi -> [metaITC + [id: tbi.getBaseName().tokenize(".")[0]], tbi]} ch_vcf_tbi_samples = ch_vcf_samples .join(ch_tbi_samples) From 0bd49f0f3fc8b5d491488e60c4ad87af43b3e7d2 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Nov 2024 15:33:34 +0100 Subject: [PATCH 11/17] Update modules --- modules.json | 7 +- .../pluginsplit/bcftools-pluginsplit.diff | 22 ++++ modules/nf-core/bcftools/pluginsplit/main.nf | 21 ++-- .../bcftools/pluginsplit/tests/main.nf.test | 38 +++++-- .../pluginsplit/tests/main.nf.test.snap | 51 ++++++--- .../bcftools/query}/environment.yml | 2 +- modules/nf-core/bcftools/query/main.nf | 56 ++++++++++ modules/nf-core/bcftools/query/meta.yml | 67 ++++++++++++ .../nf-core/bcftools/query/tests/main.nf.test | 101 ++++++++++++++++++ .../bcftools/query/tests/main.nf.test.snap | 55 ++++++++++ .../bcftools/query/tests/nextflow.config | 3 + modules/nf-core/bcftools/query/tests/tags.yml | 2 + 12 files changed, 390 insertions(+), 35 deletions(-) create mode 100644 modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff rename modules/{local/list_to_file => nf-core/bcftools/query}/environment.yml (65%) create mode 100644 modules/nf-core/bcftools/query/main.nf create mode 100644 modules/nf-core/bcftools/query/meta.yml create mode 100644 modules/nf-core/bcftools/query/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/query/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/query/tests/nextflow.config create mode 100644 modules/nf-core/bcftools/query/tests/tags.yml diff --git a/modules.json b/modules.json index 6db896db..72f0a33e 100644 --- a/modules.json +++ b/modules.json @@ -46,10 +46,15 @@ }, "bcftools/pluginsplit": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "14c910af1f9c20c65e5df9325a1e4d3939d524d1", "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff" }, + "bcftools/query": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "bcftools/stats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff new file mode 100644 index 00000000..5e3092d8 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/bcftools/pluginsplit' +'modules/nf-core/bcftools/pluginsplit/environment.yml' is unchanged +Changes in 'bcftools/pluginsplit/main.nf': +--- modules/nf-core/bcftools/pluginsplit/main.nf ++++ modules/nf-core/bcftools/pluginsplit/main.nf +@@ -8,8 +8,7 @@ + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: +- tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*") +- path(samples) ++ tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*"), path(samples) + path(groups) + path(regions) + path(targets) + +'modules/nf-core/bcftools/pluginsplit/meta.yml' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/nextflow.config' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf index 082802be..d1977fbc 100644 --- a/modules/nf-core/bcftools/pluginsplit/main.nf +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -8,8 +8,7 @@ process BCFTOOLS_PLUGINSPLIT { 'biocontainers/bcftools:1.20--h8b25389_0' }" input: - tuple val(meta), path(vcf), path(tbi) - path(samples) + tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*"), path(samples) path(groups) path(regions) path(targets) @@ -25,7 +24,6 @@ process BCFTOOLS_PLUGINSPLIT { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" def samples_arg = samples ? "--samples-file ${samples}" : "" def groups_arg = groups ? "--groups-file ${groups}" : "" @@ -40,9 +38,7 @@ process BCFTOOLS_PLUGINSPLIT { ${groups_arg} \\ ${regions_arg} \\ ${targets_arg} \\ - --output ${prefix} - - mv ${prefix}/* . + --output . cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -52,7 +48,6 @@ process BCFTOOLS_PLUGINSPLIT { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : @@ -65,11 +60,15 @@ process BCFTOOLS_PLUGINSPLIT { "" def determination_file = samples ?: targets def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " - def create_files = "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt; while IFS= read -r filename; do ${create_cmd} \"\$filename\"; done < files.txt" - def create_index = index.matches("csi|tbi") ? "cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}.${index}/' > indices.txt; touch \$( files.txt + while IFS= read -r filename; + do ${create_cmd} "./\$filename"; + if [ -n "${index}" ]; then + index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); + touch ./\$index_file; + fi; + done < files.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test index e3160851..e7ae574e 100644 --- a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test @@ -67,7 +67,6 @@ nextflow_process { } test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi") { - config "./nextflow.config" when { @@ -91,16 +90,43 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.vcf, - process.out.tbi.get(0).get(1).find { file(it).name.matches("normal.vcf.gz.tbi|tumor.vcf.gz.tbi") }, - ) } + process.out.vcf.collect{ it[1].collect { file(it).name } }, + process.out.tbi.collect{ it[1].collect { file(it).name } }, + ).match() } ) } } - test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") { + test("homo_sapiens - [ vcf, tbi ], [], [], [], [], - error no sample") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("No samples to split: input/dbsnp_146.hg38.vcf.gz") } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") { options "-stub" when { @@ -130,7 +156,6 @@ nextflow_process { } test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub") { - options "-stub" when { @@ -160,7 +185,6 @@ nextflow_process { } test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub") { - config "./nextflow.config" options "-stub" diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap index 66c3c1dd..b915b7cf 100644 --- a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap @@ -47,18 +47,39 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:42.307673651" + "timestamp": "2024-11-20T14:56:54.383979416" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi": { + "content": [ + [ + [ + "normal.vcf.gz", + "tumour.vcf.gz" + ] + ], + [ + [ + "normal.vcf.gz.tbi", + "tumour.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:56:44.796391578" }, "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets": { "content": null, "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:21.498991402" + "timestamp": "2024-11-20T14:56:36.709842966" }, "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub": { "content": [ @@ -126,10 +147,10 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:57:04.483688966" + "timestamp": "2024-11-20T14:57:11.163588435" }, "homo_sapiens - [ vcf, tbi ], samples, [], [], []": { "content": [ @@ -179,10 +200,10 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:10.033818589" + "timestamp": "2024-11-20T14:56:27.978161766" }, "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub": { "content": [ @@ -232,9 +253,9 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-07-09T15:56:53.641165787" + "timestamp": "2024-11-20T14:57:02.456908152" } } \ No newline at end of file diff --git a/modules/local/list_to_file/environment.yml b/modules/nf-core/bcftools/query/environment.yml similarity index 65% rename from modules/local/list_to_file/environment.yml rename to modules/nf-core/bcftools/query/environment.yml index 315f6dc6..5c00b116 100644 --- a/modules/local/list_to_file/environment.yml +++ b/modules/nf-core/bcftools/query/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::gawk=5.3.0 + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..58019f4d --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,56 @@ +process BCFTOOLS_QUERY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $args \\ + $vcf \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..279b3205 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,67 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be qeuried. + pattern: "*.{vcf.gz, vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..39e67b35 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..3ead1f2c --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:44.916249758" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:49.932359271" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:39.930697926" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..da81c2a0 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/tags.yml b/modules/nf-core/bcftools/query/tests/tags.yml new file mode 100644 index 00000000..fb9455cb --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/query: + - "modules/nf-core/bcftools/query/**" From c1fd1868f249942d4708c90424de22e11fc4183f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Nov 2024 15:42:34 +0100 Subject: [PATCH 12/17] Update vcf_split_bcftools --- subworkflows/local/vcf_split_bcftools/main.nf | 4 ++-- .../vcf_split_bcftools/tests/main.nf.test | 17 +++++++++----- .../tests/main.nf.test.snap | 22 +++++++++---------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/subworkflows/local/vcf_split_bcftools/main.nf b/subworkflows/local/vcf_split_bcftools/main.nf index 524b3e88..72832214 100644 --- a/subworkflows/local/vcf_split_bcftools/main.nf +++ b/subworkflows/local/vcf_split_bcftools/main.nf @@ -2,13 +2,13 @@ include { BCFTOOLS_PLUGINSPLIT } from '../../../modules/nf-core/bcftools/plugin workflow VCF_SPLIT_BCFTOOLS { take: - ch_vcf // channel: [ [id, chr, tools], vcf, index ] + ch_vcf // channel: [ [id, chr, tools], vcf, index, samples ] main: ch_versions = Channel.empty() - BCFTOOLS_PLUGINSPLIT(ch_vcf, [], [], [], []) + BCFTOOLS_PLUGINSPLIT(ch_vcf, [], [], []) ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSPLIT.out.versions.first()) ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test index 1864bb76..b85b2be9 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/split" - test("Split multiple vcf file") { + test("Split multiple vcf file - with renaming") { setup { run("BCFTOOLS_MERGE") { script "../../../../modules/nf-core/bcftools/merge/main.nf" @@ -45,7 +45,12 @@ nextflow_workflow { when { workflow { """ - input[0] = BCFTOOLS_MERGE.out.vcf.join(BCFTOOLS_MERGE.out.tbi) + renaming_file = channel.of( + "NA12878\tNA12878_test NA12878.myfile", + "NA19401\t-\tNA19401", + "NA20359\tNA20359_2\tNA20359_3" + ).collectFile(name: "samples.txt", newLine: true) + input[0] = BCFTOOLS_MERGE.out.vcf.join(BCFTOOLS_MERGE.out.tbi).combine(renaming_file) """ } } @@ -78,7 +83,8 @@ nextflow_workflow { input[0] = Channel.of([ [id: 'NA12878'], file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), - file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi", checkIfExist:true) + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi", checkIfExist:true), + [] ]) """ } @@ -106,7 +112,8 @@ nextflow_workflow { input[0] = Channel.of([ [id: 'dbsnp_146.hg38'], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", checkIfExist:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi", checkIfExist:true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi", checkIfExist:true), + [] ]) """ } @@ -115,7 +122,7 @@ nextflow_workflow { then { assertAll( { assert workflow.failed }, - { assert workflow.errorReport.contains("No samples to split: dbsnp_146.hg38.vcf.gz") + { assert workflow.errorReport.contains("No samples to split: input/dbsnp_146.hg38.vcf.gz") } ) } diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap index 02a8da3a..d6084e40 100644 --- a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap @@ -16,11 +16,11 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-13T14:06:59.745818945" + "timestamp": "2024-11-22T13:53:09.194659411" }, - "Split multiple vcf file": { + "Split multiple vcf file - with renaming": { "content": [ [ "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" @@ -30,8 +30,8 @@ { "id": "NA12878" }, - "NA12878.vcf.gz", - "NA12878.vcf.gz.tbi" + "NA12878.myfile.vcf.gz", + "NA12878.myfile.vcf.gz.tbi" ], [ { @@ -42,10 +42,10 @@ ], [ { - "id": "NA20359" + "id": "NA20359_3" }, - "NA20359.vcf.gz", - "NA20359.vcf.gz.tbi" + "NA20359_3.vcf.gz", + "NA20359_3.vcf.gz.tbi" ] ], [ @@ -56,8 +56,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-11T14:42:48.076610625" + "timestamp": "2024-11-22T13:53:01.127637055" } -} +} \ No newline at end of file From cd450c313f8178ab4307ccccaa2181d512a3a6f4 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Nov 2024 15:43:39 +0100 Subject: [PATCH 13/17] Add samples renaming for truth splitted files --- conf/steps/validation.config | 14 ++++ conf/test.config | 2 +- conf/test_all.config | 2 +- workflows/phaseimpute/main.nf | 11 ++- workflows/phaseimpute/tests/main.nf.test.snap | 72 +++++++++---------- 5 files changed, 61 insertions(+), 40 deletions(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 2c023a07..3a9b85cb 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -64,6 +64,20 @@ process { ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') } + // Compute sample files for renaming + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.args = '--list-samples' + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.prefix = { "${meta.id}_samples"} + ext.args2 = "'BEGIN { OFS = \"\\t\" } { print \$1, \"-\", \$1\".truth\" }'" + publishDir = [enabled: false] + } + // Split by samples withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { ext.args = ["--output-type z", "--write-index=tbi"].join(' ') diff --git a/conf/test.config b/conf/test.config index f5c7b8dd..d3d19505 100644 --- a/conf/test.config +++ b/conf/test.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 4, - memory: '15.GB', + memory: '7.GB', time: '1.h' ] } diff --git a/conf/test_all.config b/conf/test_all.config index c16d7904..fc2e845f 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '10.GB', + memory: '7.GB', time: '1.h' ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 4ca49bea..34ea9e40 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -65,6 +65,8 @@ include { BCFTOOLS_STATS as BCFTOOLS_STATS_TOOLS } from '../../modules/nf-co // Concordance subworkflows include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query' +include { GAWK } from '../../modules/nf-core/gawk' include { VCF_SPLIT_BCFTOOLS as SPLIT_TRUTH } from '../../subworkflows/local/vcf_split_bcftools' include { BCFTOOLS_STATS as BCFTOOLS_STATS_TRUTH } from '../../modules/nf-core/bcftools/stats' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' @@ -372,7 +374,7 @@ workflow PHASEIMPUTE { } // Split result by samples - VCF_SPLIT_BCFTOOLS(ch_input_validate) + VCF_SPLIT_BCFTOOLS(ch_input_validate.map{ [it[0], it[1], it[2], []] }) ch_input_validate = VCF_SPLIT_BCFTOOLS.out.vcf_tbi // Compute stats on imputed files @@ -443,8 +445,13 @@ workflow PHASEIMPUTE { CONCAT_TRUTH(ch_truth_vcf) ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + // Prepare renaming file + BCFTOOLS_QUERY(CONCAT_TRUTH.out.vcf_tbi, [], [], []) + GAWK(BCFTOOLS_QUERY.out.output, []) + ch_pluginsplit = CONCAT_TRUTH.out.vcf_tbi.join(GAWK.out.output.view()) + // Split truth vcf by samples - SPLIT_TRUTH(CONCAT_TRUTH.out.vcf_tbi) + SPLIT_TRUTH(ch_pluginsplit) ch_versions = ch_versions.mix(SPLIT_TRUTH.out.versions) // Compute stats on truth files diff --git a/workflows/phaseimpute/tests/main.nf.test.snap b/workflows/phaseimpute/tests/main.nf.test.snap index 383192e4..ed71c96a 100644 --- a/workflows/phaseimpute/tests/main.nf.test.snap +++ b/workflows/phaseimpute/tests/main.nf.test.snap @@ -25,10 +25,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:47:19.203956394" + "timestamp": "2024-11-22T13:07:42.012993182" }, "Check test_all": { "content": [ @@ -117,12 +117,12 @@ [ "validation/concat/all.A-truth.vcf.gz", "validation/concat/all.A-truth.vcf.gz.tbi", - "validation/samples/NA12878.vcf.gz", - "validation/samples/NA12878.vcf.gz.tbi", - "validation/samples/NA19401.vcf.gz", - "validation/samples/NA19401.vcf.gz.tbi", - "validation/samples/NA20359.vcf.gz", - "validation/samples/NA20359.vcf.gz.tbi", + "validation/samples/NA12878.truth.vcf.gz", + "validation/samples/NA12878.truth.vcf.gz.tbi", + "validation/samples/NA19401.truth.vcf.gz", + "validation/samples/NA19401.truth.vcf.gz.tbi", + "validation/samples/NA20359.truth.vcf.gz", + "validation/samples/NA20359.truth.vcf.gz.tbi", "validation/stats/AllSamples.txt", "validation/stats/NA12878.truth.bcftools_stats.txt", "validation/stats/NA12878_P1000GP_Tglimpse1_SNP.txt", @@ -167,19 +167,19 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-18T23:36:12.634065627" + "timestamp": "2024-11-22T13:58:54.188813202" }, "Check test_validate": { "content": [ [ - "validation/samples/NA12878.vcf.gz", - "validation/samples/NA12878.vcf.gz.tbi", - "validation/samples/NA19401.vcf.gz", - "validation/samples/NA19401.vcf.gz.tbi", - "validation/samples/NA20359.vcf.gz", - "validation/samples/NA20359.vcf.gz.tbi", + "validation/samples/NA12878.truth.vcf.gz", + "validation/samples/NA12878.truth.vcf.gz.tbi", + "validation/samples/NA19401.truth.vcf.gz", + "validation/samples/NA19401.truth.vcf.gz.tbi", + "validation/samples/NA20359.truth.vcf.gz", + "validation/samples/NA20359.truth.vcf.gz.tbi", "validation/stats/AllSamples.txt", "validation/stats/NA12878.truth.bcftools_stats.txt", "validation/stats/NA12878_SNP.txt", @@ -191,9 +191,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-18T23:30:21.177496237" + "timestamp": "2024-11-22T13:54:09.435194577" }, "Check test_batch": { "content": [ @@ -249,12 +249,12 @@ "NA20359" ], [ - "validation/samples/NA12878.vcf.gz", - "validation/samples/NA12878.vcf.gz.tbi", - "validation/samples/NA19401.vcf.gz", - "validation/samples/NA19401.vcf.gz.tbi", - "validation/samples/NA20359.vcf.gz", - "validation/samples/NA20359.vcf.gz.tbi", + "validation/samples/NA12878.truth.vcf.gz", + "validation/samples/NA12878.truth.vcf.gz.tbi", + "validation/samples/NA19401.truth.vcf.gz", + "validation/samples/NA19401.truth.vcf.gz.tbi", + "validation/samples/NA20359.truth.vcf.gz", + "validation/samples/NA20359.truth.vcf.gz.tbi", "validation/stats/AllSamples.txt", "validation/stats/NA12878.truth.bcftools_stats.txt", "validation/stats/NA12878_P1000GP_Tglimpse2_SNP.txt", @@ -269,9 +269,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "24.10.1" }, - "timestamp": "2024-11-18T23:47:48.301571495" + "timestamp": "2024-11-22T14:06:57.642618122" }, "Check test_quilt": { "content": [ @@ -299,10 +299,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:46:11.131198832" + "timestamp": "2024-11-22T13:05:58.709941089" }, "Check test_sim": { "content": [ @@ -380,10 +380,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:44:41.049965503" + "timestamp": "2024-11-22T13:03:28.516026252" }, "Check test": { "content": [ @@ -463,9 +463,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-25T16:56:33.72923425" + "timestamp": "2024-11-22T13:16:12.803136748" } } \ No newline at end of file From 2e540952f61aa036cc07c8fac00689bfdc069b93 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Nov 2024 15:48:00 +0100 Subject: [PATCH 14/17] Reset changes --- conf/test.config | 2 +- conf/test_all.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index d3d19505..f5c7b8dd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 4, - memory: '7.GB', + memory: '15.GB', time: '1.h' ] } diff --git a/conf/test_all.config b/conf/test_all.config index fc2e845f..c16d7904 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '7.GB', + memory: '10.GB', time: '1.h' ] } From ca67cbb9ad8f67a2446a930df101f1905ff1dce3 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Nov 2024 15:49:47 +0100 Subject: [PATCH 15/17] Reset changes --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 55dc7321..6e6777a4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -168,7 +168,7 @@ The results from `--steps validate` will have the following directory structure: - `samples/*.vcf.gz`: A VCF of each truth sample. - `samples/*.vcf.gz.tbi`: A tbi for the truth VCF. - `stats/`: - - `*.bcftools_stats.txt`: The statistics of the truth VCF target file produced by [`BCFTOOLS_STATS`](https://samtools.github.io/bcftools/bcftools.html#stats.) + - `*.truth.bcftools_stats.txt`: The statistics of the truth VCF target file produced by [`BCFTOOLS_STATS`](https://samtools.github.io/bcftools/bcftools.html#stats.) - `*.P_T_SNP.txt`: Concordance metrics of the SNP variants obtained with [`GLIMPSE2_CONCORDANCE`](https://odelaneau.github.io/GLIMPSE/docs/documentation/concordance/). - `AllSamples.txt`: Aggregation of the above `GLIMPSE_CONCORDANCE` output across samples and tools. From 46464ebb172f3bba932183b8e53934746c554805 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Nov 2024 15:51:57 +0100 Subject: [PATCH 16/17] Reset changes --- modules/local/list_to_file/environment.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 modules/local/list_to_file/environment.yml diff --git a/modules/local/list_to_file/environment.yml b/modules/local/list_to_file/environment.yml new file mode 100644 index 00000000..315f6dc6 --- /dev/null +++ b/modules/local/list_to_file/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 From d3aac0c486fc6fb2d7f71de96b83b6ac21440f8a Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sat, 23 Nov 2024 15:51:26 +0100 Subject: [PATCH 17/17] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cea3088..3a1b8d40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ Special thanks to [Matthias Hörtenhuber](https://github.com/mashehu) and [Mazza - [#117](https://github.com/nf-core/phaseimpute/pull/117) - Fix directories in CSV. - [#151](https://github.com/nf-core/phaseimpute/pull/151) - Fix `Type not supported: class org.codehaus.groovy.runtime.GStringImpl` error due to `String` test in `getFileExtension()`. - [#153](https://github.com/nf-core/phaseimpute/pull/153) - Fix getFileExtension function. Fix image in `usage.md`. Fix small warnings and errors with updated language server. `def` has been added when necessary, `:` use instead of `,` in assertions, `_` added to variables not used in closures, `for` loop replaced by `.each{}`, remove unused code / input. +- [#161](https://github.com/nf-core/phaseimpute/pull/161) - Fix `VCF_SPLIT_BCFTOOLS` when only one sample present by updating `BCFTOOLS_PLUGINSPLIT` and adding `BCFTOOLS_QUERY` to get truth samples names for renaming the resulting files. ### `Dependencies`