-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunall.sh
132 lines (121 loc) · 6.27 KB
/
runall.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# example of how to execute this script:
# bash runall.sh test_data/raw_pdb_files test_data/ras_reference_alignment.fa test_data/pdb_chain_pfam.csv PF00071 1g16
if [ $1 == '-h' ] || [ $1 == '--help' ] || [ $# -ne 5 ]; then
printf 'Usage: bash %s PDB-files-directory reference-alignment reference-structure-PDB-ID Pfam-domain-ID SIFTS-file\n' $0
printf '\npositional arguments:
PDB-files-directory Directory with PDB-structures for calculation of
residue contact network.
reference-alignment Alignment of the sequences of the structures for which
residue contact networks were created. See the
documentation for information about the requirements
of such an alignment.
sifts_chain_pfam SIFTS-file "pdb_chain_pfam.csv" for finding chains
withing the PDB-structures which contain the Pfam-
domain of interest.
reference_structure For all residues, the equivalent residues (PDB-numbering)
of the reference structure will be provided.
Just provide the PDB-ID of your favourite structure
of the dataset.
Pfam-domain-ID Pfam domain of interest. Important for the program to
know which chains to analyse in case of complex
structures.\n'
exit 0
fi
RAW_PDB_FILES_DIR=$1 # e.g. path/to/my_pdb_files
REFERENCE_ALIGNMENT=$2 # e.g. path/to/my_alignment.fa
SIFTS_PDB_CHAIN_PFAM=$3 # file from https://www.ebi.ac.uk/pdbe/docs/sifts/quick.html, update if necessary
PFAM_DOMAIN_OF_INTEREST=$4 # e.g. PF00071
REFERENCE_STRUCTURE=$5 # e.g. 1g16
ATOMIC_DISTANCE_CUTOFF=5 # two residues are considered to form a contact if any two atoms are witing 5 Angstrom of each other
# SET VALUE ACCORDING TO YOUR PREFERENCES
printf 'INPUT DATA:\n'
printf 'PDB-files in directory %s\n' $RAW_PDB_FILES_DIR
printf 'reference alignment: %s\n' $REFERENCE_ALIGNMENT
printf 'referenct structure: %s\n' $REFERENCE_STRUCTURE
printf 'Pfam domain of interest: %s\n' $PFAM_DOMAIN_OF_INTEREST
printf 'File for identification of PDB-chains with Pfam domain of interest: %s\n\n' $SIFTS_PDB_CHAIN_PFAM
ipython scripts/check_data.py $RAW_PDB_FILES_DIR/ $REFERENCE_ALIGNMENT $SIFTS_PDB_CHAIN_PFAM 2> /dev/null
# 2> /dev/null: redirect output to null device (output not printed)
if [ $? -eq 0 ]; then
printf 'Data-check: successful.\nStarting analysis...\n'
else
printf '\nScript check_data.py has non-zero exit status (maybe an incorrect/missing argument?).\n'
printf 'Runall script abortet.\nPlease check the input data and restart runall.sh.\n'
exit 1
fi
# Check if output directory already exists
if [ -d "results" ]; then
printf '\nDirectory "%s" already exists. Do you want to remove its content and continue?\n' 'results'
printf '(press "y" to continue or "n" to exit) '
read CONTINUE
if [ $CONTINUE == 'y' ]; then
rm -rf results
else
printf 'Program exits.\nPlease move directory "%s" to a different location.\n' 'results'
exit 1
fi
fi
mkdir results # create output directory
printf '\nPrepare PDB-files for residue contact calculation:\n'
printf '(extract only one chain, which contains the Pfam-domain of interest, from each input PDB-file and write it to a new PDB-file)\n'
ipython scripts/process_pdb.py $RAW_PDB_FILES_DIR $PFAM_DOMAIN_OF_INTEREST $SIFTS_PDB_CHAIN_PFAM results/processed_pdb_files 2> /dev/null
if [ $? -eq 0 ]; then
printf 'PDB-files prepared\n'
else
printf '\nScript process_pdb.py has non-zero exit status (maybe an incorrect/missing argument?).\n'
printf 'Runall script abortet.\nAfter the issue with process_pdb.py is fixed, please simply restart runall.sh.\n'
exit 1
fi
printf '\nCalculate residue contact networks:\n'
ipython scripts/calculate_networks.py results/processed_pdb_files $ATOMIC_DISTANCE_CUTOFF
if [ $? -eq 0 ]; then
printf 'Residue contact networks calculated and written to file.\n'
else
printf '\nScript calculate_networks.py has non-zero exit status (maybe an incorrect/missing argument?).\n'
printf 'Runall script abortet.\nAfter the issue with calculate_networks.py is fixed, please simply restart runall.sh.\n'
exit 1
fi
printf '\nMap PDB-residue numbers to reference alignment positions:\n'
ipython scripts/map_networks.py results/processed_pdb_files $REFERENCE_ALIGNMENT $REFERENCE_STRUCTURE 2> /dev/null
if [ $? -eq 0 ]; then
printf 'Residues mapped and mapping file written.\n'
else
printf '\nScript map_networks.py has non-zero exit status (maybe an incorrect/missing argument?).\n'
printf 'Runall script abortet.\nAfter the issue with map_networks.py is fixed, please simply restart runall.sh.\n'
exit 1
fi
printf '\nCalculate consensus network using the single networks and the residue mapping file:\n'
Rscript scripts/calculate_consensus_network.R results/raw_networks.csv results/mapping.csv $REFERENCE_ALIGNMENT 2> /dev/null
if [ $? -eq 0 ]; then
printf 'Consensus network written to file.\n'
else
printf '\nScript calculate_consensus_network.R has non-zero exit status (maybe an incorrect/missing argument?).\n'
printf 'Runall script abortet.\nAfter the issue with calculate_consensus_network.R is fixed, please simply restart runall.sh.\n'
exit 1
fi
printf '\nAnalysing consensus residue contact network:\n'
cd results
Rscript -e "library(knitr); knit('../scripts/analysis.Rmd')" > /dev/null #2> /dev/null
if [ $? -eq 0 ]; then
printf 'Markdown file created from script analysis.Rmd.\n'
else
printf '\nScript analysis.Rmd has non-zero exit status (maybe knitr is missing in the library?).\n'
printf 'Runall script abortet.\nAfter the issue with analysis.Rmd is fixed, please simply restart runall.sh.\n'
exit 1
fi
Rscript -e "library(markdown); markdownToHTML('analysis.md', 'analysis.html', options=c('use_xhml'))"
if [ $? -eq 0 ]; then
printf '\nHTML-report created.\n'
else
printf '\nCreation of HTML-report failed (maybe markdownToHTML is missing in library?).\n'
printf 'Runall script abortet.\nAfter the issue is fixed, please simply restart runall.sh.\n'
exit 1
fi
pandoc -s analysis.html -o analysis.pdf
if [ $? -eq 0 ]; then
printf '\nPDF-report created.\n'
else
printf '\nCreation of PDF-report failed (maybe pandoc not installed?).\n'
printf 'Runall script abortet.\nAfter the issue is fixed, please simply restart runall.sh.\n'
exit 1
fi