-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDraw_Signal_tracks.py
99 lines (67 loc) · 2.68 KB
/
Draw_Signal_tracks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
import scipy
import pandas as pd
import pybedtools as pb
#import matplotlib as mpl
#~ import matplotlib
#~ matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
WORKDIR = '/home/sergio/Res_CIML/TLX3_project'
SCRIPTS = join(WORKDIR,'scripts')
DATADIR = join(WORKDIR,'data')
WGS = join(DATADIR,'tracks/WGS-WES/Germline')
import allel
import deeptools.getScorePerBigWigBin as gs
from scipy.signal import savgol_filter
def kataegis(chrm,vcf_tb, shr=0):
vcf_ch = vcf_tb[vcf_tb['CHROM']==chrm].sort_values('POS',axis=0, ascending=True)
vcf_ch['mut_TYPE'] = vcf_ch['REF']+'>'+vcf_ch['ALT']
vcf_ch['mut_TYPE'][vcf_ch['is_snp']==False]='not_snp'
vcf_ch['DIFF'] = vcf_ch['POS'].diff()
vcf_ch['logDIFF'] = np.log10(vcf_ch['DIFF'])
if shr > 0:
vcf_ch = vcf_ch[vcf_ch['logDIFF']<shr]
f, ax = plt.subplots(figsize=(26.5, 6.5))
hue_order = ['A>C','A>G','A>T','C>A','C>G','C>T','G>A','G>C','G>T','T>A','T>C','T>G', 'not_snp']
sns.scatterplot(x= 'POS', #range(len(vcf_ch)),
y = 'logDIFF',
hue='mut_TYPE',
hue_order = hue_order,
palette= 'tab20_r', #gnuplot_r', #tab20',
ax = ax,
data=vcf_ch)
ax.set_title(chrm)
return f, ax
# BigWig track
bw = join(DATADIR,'tracks/ChiP-seq_tracks/mm9_bigWig/TLX3_H3K27ac_mm9.bw')
#~ bw = join(DATADIR,'tracks/ChiP-seq_tracks/mm9_bigWig/TLX3_H3K36me3_mm9.bw')
#~ bw = join(DATADIR,'tracks/ChiP-seq_tracks/mm9_bigWig/TLX3_POLII_mm9.bw')
# WGS track
vcf_tb = allel.vcf_to_dataframe(join(WGS,'TLX3_WGS.vcf.gz'),fields='*', numbers={'ALT': 1})
vcf_tb = vcf_tb[vcf_tb['FILTER_PASS']==True]
cols = ['CHROM', 'POS', 'REF', 'ALT','is_snp']
for chrm in vcf_tb['CHROM'].unique()[:]:
vcf_f = vcf_tb[vcf_tb['CHROM']==chrm][cols]
f, ax = kataegis(chrm,vcf_f,shr=0)
st = 0
end = vcf_f['POS'].max() #1.5e8
step=200
vl = gs.countFragmentsInRegions_worker(chrm,
int(st),
int(end),
[bw],
stepSize=step,
binLength=step,
save_data=False)
xv = np.arange(st,end,step)
yv = np.squeeze(vl[0])
yvf = savgol_filter(yv,9,3)
ax1 = ax.twinx()
colr = 'r'
ax1.plot(xv,yvf, '--.',color=colr, MarkerSize=0.8, LineWidth=0.1)
ax1.set_ylabel(bw.split('/')[-1].split('.')[0])
ax1.set_ylim(0, 3)
ax1.tick_params(axis='y', labelcolor=colr)
plt.show()