-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchromlink_cmm.py
103 lines (100 loc) · 2.8 KB
/
chromlink_cmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python3
import argparse, os
import numpy as np
import pandas as pd
def file_path(string):
if os.path.exists(string):
return string
else:
raise NotADirectoryError(string)
parser = argparse.ArgumentParser(
prog='chromlink_cmm',
description="""
Link chromosomes together in CMM files.
"""
)
# arguments
parser.add_argument(
'-in',
'--inFile',
type=file_path,
help='Path to the Chimera .cmm file to be updated',
required= True
)
parser.add_argument(
'-out',
'--outFile',
type=str,
help='Path to the output cmm file (optional).',
nargs='?', # the nr of times the arg can be used (? makes it optional)
default=None,
required=True
)
parser.add_argument(
'-rgb',
'--rgbFile',
type=file_path,
help="""
Path and name of a .csv file with only three fields named r,g,b and
values where the row number corresponds to the chromosome number; no headers.
""",
required=False
)
parser.add_argument(
'-L',
'--lFile',
type=file_path,
help='Path to the L-file with chromosome label per bin, i.e. L=[1,1,1,...,24]',
required= True
)
args = parser.parse_args()
L_vec = np.fromfile(args.lFile, dtype='uint8')
chromosomes = sorted(list(set(L_vec)))
# Default colors as used in Chromflock:
chromosome_colors = {
1: (240, 163, 255),
2: (0, 117, 220),
3: (153, 63, 0),
4: (76, 0, 92),
5: (25, 25, 25),
6: (0, 92, 49),
7: (43, 206, 72),
8: (255, 204, 153),
9: (128, 128, 128),
10: (148, 255, 181),
11: (143, 124, 0),
12: (157, 204, 0),
13: (194, 0, 136),
14: (0, 51, 128),
15: (255, 164, 5),
16: (255, 168, 187),
17: (66, 102, 0),
18: (255, 0, 16),
19: (94, 241, 242),
20: (0, 153, 143),
21: (224, 255, 102),
22: (116, 10, 255),
23: (153, 0, 0),
24: (255, 255, 128)
}
if args.rgbFile:
ccols = pd.read_csv(args.rgbFile, sep=',', header=None).values
chromosome_colors = {i + 1: ccols[i] for i, v in enumerate(ccols)}
chrom_colors = {
c: np.round(np.array(chromosome_colors[c]) / 255, 4) for c in chromosome_colors
}
# Processing links and markers
cmm_links = []; cmm_markers = []
for c in chromosomes:
chrom_bins = [i for i, v in enumerate(L_vec) if v == c]
rgb = chrom_colors[c]
for i in range(len(chrom_bins) - 1):
cmm_links.append(f'<link id1="{chrom_bins[i]}" id2="{chrom_bins[i+1]}" r="{rgb[0]}" g="{rgb[1]}" b="{rgb[2]}" radius="0.006251"/>\n')
with open(args.inFile, 'r') as infile:
for line in infile:
if line.startswith("<marker_set") or line.startswith("<marker id"):
cmm_markers.append(line)
with open(args.outFile, 'w') as outfile:
for line in cmm_markers + cmm_links:
outfile.write(line)
outfile.write("</marker_set>")