-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchipipe.sh
124 lines (98 loc) · 2.66 KB
/
chipipe.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/bin/bash
PARAMS=$1
if [ $# -ne 1 ]
then
echo "Number of arguments is: $#"
echo "Please, only 1 argument for this function!"
echo "Usage chipipe.sh <params.file>"
exit
fi
PARAMS=$1
INSDIR=$(grep installation $PARAMS | awk '{print($2)}')
echo "Installation directory is: $INSDIR"
WD=$(grep working $PARAMS | awk '{print($2)}')
echo "Working directory is: $WD"
EXP=$(grep experiment $PARAMS | awk '{print($2)}')
echo "Experiment directory is: $EXP"
NUMREPLICAS=$(grep number_replicas $PARAMS | awk '{print($2)}')
echo " Number of replica: $NUMREPLICAS"
GENOME=$(grep genome $PARAMS | awk '{print($2)}')
echo "Genome directory is: $GENOME"
ANNOTATION=$(grep annotation $PARAMS | awk '{print($2)}')
echo "Annotation directory is: $ANNOTATION"
CHR=$(grep chromosomes $PARAMS | awk '{print($2)}')
echo "Universe of chromosomes directory is: $CHR"
PEAK=$(grep peak $PARAMS | awk '{print($2)}')
echo "Peak type: $PEAK"
SINGLE=$(grep single $PARAMS | awk '{print($2)}')
echo "SINGLE or PAIRED: $SINGLE"
TSSUP=$(grep upstream $PARAMS | awk '{print($2)}')
echo "TSS region upstream is: $TSSUP"
TSSDOWN=$(grep downstream $PARAMS | awk '{print($2)}')
echo "TSS region downstream is: $TSSDOWN"
CHIPS=()
INPUTS=()
i=0
if [ $SINGLE -eq 1 ]
then
while [ $i -lt $NUMREPLICAS ]
do
j=$(($i + 1))
CHIPS[$i]=$(grep path_sample_chip_$j $PARAMS | awk '{print($2)}')
INPUTS[$i]=$(grep path_sample_input_$j $PARAMS | awk '{print($2)}')
((i++))
done
elif [ $SINGLE -eq 2]
then
echo "no hay paired todavia"
else
echo "No allowed input for single/paired end reads determination"
fi
echo "Samples = "
echo "${CHIPS[@]}"
echo "${INPUTS[@]}"
#Generationg work space
echo "====================="
echo "GENERATING WORK SPACE"
echo "====================="
cd $WD
mkdir $EXP
cd $EXP
mkdir genome annotation results samples scripts
cp $GENOME genome/genome.fa
cp $ANNOTATION annotation/annotation.gtf
cd samples
if [ $SINGLE -eq 1 ]
then
i=1
while [ $i -le $NUMREPLICAS ]
do
mkdir replica_$i
cd replica_$i
mkdir chip input replica_results
j=$(($i-1))
cp ${CHIPS[$j]} chip/sample_chip_$i.fq.gz
cp ${INPUTS[$j]} input/sample_input_$i.fq.gz
cd ..
((i++))
done
fi
echo "===================="
echo "WORKSPACE CREATED"
echo "===================="
echo "====================="
echo "Creating index"
echo "====================="
cd ../genome
bowtie2-build genome.fa index
echo "Files size:" du -h *
echo "====================="
echo "Processing samples"
echo "====================="
cd ../results
i=1
while [ $i -le $NUMREPLICAS ]
do
sbatch --job-name=proc_$i --output=sample_proc_$i --error=err_$i $INSDIR/sample_proc.sh $WD $i $PEAK $NUMREPLICAS $INSDIR $EXP $CHR $TSSUP $TSSDOWN $GENOME
((i++))
done