-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathhaphic
executable file
·153 lines (113 loc) · 5.53 KB
/
haphic
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Xiaofei Zeng
# Email: xiaofei_zeng@whu.edu.cn
# Created Time: 2022-09-13 22:47
"""This code is only a wrapper for HapHiC scripts"""
import os
import sys
import subprocess
try:
from importlib.metadata import distribution
except:
from pkg_resources import get_distribution as distribution
from scripts._version import __version__, __update_time__
TITLE_DOC = """
\033[42;37m ______ __ ______ _______________ \033[0m
\033[42;37m ___ / / /_____ ___________ / / /__(_)_ ____/ \033[0m
\033[42;37m __ /_/ /_ __ `/__ __ \\_ /_/ /__ /_ / \033[0m
\033[42;37m _ __ / / /_/ /__ /_/ / __ / _ / / /___ \033[0m
\033[42;37m /_/ /_/ \\__,_/ _ .___//_/ /_/ /_/ \\____/ \033[0m
\033[42;37m /_/ \033[0m
\033[42;37m HapHiC: A fast, reference-independent, \033[0m
\033[42;37m allele-aware scaffolding tool based on Hi-C data \033[0m
\033[47;32m Author : Xiaofei Zeng \033[0m
\033[47;32m Email : xiaofei_zeng@whu.edu.cn \033[0m
\033[47;32m Version : {} \033[0m
\033[47;32m Update : {} \033[0m
""".format(__version__, __update_time__)
HELP_DOC = """
Usage: haphic [command] <parameters>
Command Description
-------- ----------------------------------------------------------------------
pipeline Execute an automated pipeline consisting of "cluster", "reassign",
"sort", and "build". For better result through parameter tuning
or to parallelize the "sort" step on a computer cluster, you may
run the commands below individually:
cluster [Step1] Execute preprocessing and Markov clustering for contigs.
reassign [Step2] Reassign and rescue contigs. Perform additional agglomerative
hierarchical clustering if necessary.
sort [Step3] Order and orient contigs within each group.
build [Step4] Build final scaffolds (pseudomolecules).
plot Draw Hi-C contact map for the whole genome or each scaffold.
check Check whether all dependencies are correctly installed.
refsort Order and orient whole scaffolds based on a reference genome.
allhic Call a modified version of ALLHiC, in which the hot-start optimization
has been fixed (source code: https://github.com/zengxiaofei/allhic).
juicer Show the guide to generate files for Juicebox visualization.
Use haphic [command] --help/-h to see detailed help for each individual command.
Visit the GitHub repository for more information:
https://github.com/zengxiaofei/HapHiC
"""
JUICE_DOC = """
[Step 0] Install 3D-DNA, matlock, and Juicebox scripts
(1) 3D-DNA: https://github.com/aidenlab/3d-dna
(2) matlock: https://github.com/phasegenomics/matlock
(3) Juicebox scripts: https://github.com/phasegenomics/juicebox_scripts
[Step 1] Generate .mnd files
$ /path/to/matlock bam2 juicer <filtered_bam> out.links.mnd
$ sort -k2,2 -k6,6 out.links.mnd > out.sorted.links.mnd
[Step 2] Generate .assembly file
$ /path/to/juicebox_scripts/agp2assembly.py scaffolds.agp scaffolds.assembly
[Step 3] Generate .hic file
$ bash /path/to/3d-dna/visualize/run-assembly-visualizer.sh -p false scaffolds.assembly out.sorted.links.mnd
"""
# a dict mapping commands to corresponding programs
param_dict = {
'cluster': 'HapHiC_cluster.py',
'reassign': 'HapHiC_reassign.py',
'sort': 'HapHiC_sort.py',
'build': 'HapHiC_build.py',
'refsort': 'HapHiC_refsort.py',
'plot': 'HapHiC_plot.py',
'pipeline': 'HapHiC_pipeline.py',
'allhic': 'allhic'}
def check_packages(pkg, pkg_name):
try:
print('Checking {}... '.format(pkg_name), end='')
__import__(pkg)
print('\033[32mSuccessful (version: {}).\033[0m'.format(distribution(pkg_name).version))
except:
print('\033[31mFailed. {} is not correctly installed.\033[0m'.format(pkg_name))
# commands
if len(sys.argv) > 1 and sys.argv[1] in param_dict:
script_realpath = os.path.dirname(os.path.realpath(__file__))
sub_program = os.path.join(script_realpath, 'scripts', param_dict[sys.argv[1]])
if len(sys.argv[2:]) == 0:
commands = [sub_program, '-h']
else:
commands = [sub_program] + sys.argv[2:]
subprocess.run(commands, check=True)
# print help message
elif len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] in {'-h', '--help'}):
print(TITLE_DOC)
print(HELP_DOC)
elif sys.argv[1] == 'check':
for pkg, pkg_name in [
('pysam', 'pysam'), ('portion', 'portion'),
('numpy', 'numpy'), ('scipy', 'scipy'),
('sklearn', 'scikit-learn'),
('networkx', 'networkx'),
('sparse_dot_mkl', 'sparse_dot_mkl'),
('matplotlib', 'matplotlib')]:
check_packages(pkg, pkg_name)
elif sys.argv[1] in {'-v', '--version'}:
print('HapHiC', __version__)
# for downstream analysis
elif sys.argv[1].lower() in {'juice', 'juicer', 'juicebox'}:
print(JUICE_DOC)
# exceptions
else:
print(TITLE_DOC)
print(HELP_DOC)
raise RuntimeError('Bad command or parameters! Please check the help message above.')