-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathmake_chain_groups.py
executable file
·80 lines (71 loc) · 3.72 KB
/
make_chain_groups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
#
# make_chain_groups.py v1 2019-02-14
'''make_chain_groups.py last modified 2019-09-25
generate a PyMOL script to make selection groups for each chain
make_chain_groups.py -p 5ara.pdb > 5ara_chains.pml
run script in the PyMOL console as:
@5ara_chains.pml
'''
import sys
import argparse
# DBREF specs from:
# http://www.wwpdb.org/documentation/file-format-content/format33/sect3.html#DBREF
#COLUMNS DATA TYPE FIELD DEFINITION
#-----------------------------------------------------------------------------------
# 1 - 6 Record name "DBREF "
# 8 - 11 IDcode idCode ID code of this entry.
#13 Character chainID Chain identifier.
#15 - 18 Integer seqBegin Initial sequence number of the
# PDB sequence segment.
#19 AChar insertBegin Initial insertion code of the
# PDB sequence segment.
#21 - 24 Integer seqEnd Ending sequence number of the
# PDB sequence segment.
#25 AChar insertEnd Ending insertion code of the
# PDB sequence segment.
#27 - 32 LString database Sequence database name.
#34 - 41 LString dbAccession Sequence database accession code.
#43 - 54 LString dbIdCode Sequence database identification code.
#56 - 60 Integer dbseqBegin Initial sequence number of the
# database seqment.
#61 AChar idbnsBeg Insertion code of initial residue of the
# segment, if PDB is the reference.
#63 - 67 Integer dbseqEnd Ending sequence number of the
# database segment.
#68 AChar dbinsEnd Insertion code of the ending residue of
# the segment, if PDB is the reference.
def make_chain_select_commands(pdbfile, wayout):
'''read PDB file and return a dict where key is chain and value is sequence ID'''
keepchains = {} # dict where key is chain and value is seqid
chaintracker = {} # key is chains from ATOM records, value is True
sys.stderr.write("# Reading chain info from PDB {}\n".format(pdbfile) )
for line in open(pdbfile,'r'):
record = line[0:6].strip()
# get relevant chains that match the sequence, in case of hetero multimers
if record=="DBREF":
proteinid = line[42:56].strip()
chaintarget = line[12]
keepchains[chaintarget] = proteinid
elif record=="ATOM":
chaintarget = line[21]
chaintracker[chaintarget] = True
if keepchains:
for chain in sorted(keepchains.keys()):
wayout.write("select {}__{}, chain {}\n".format( chain, keepchains[chain], chain ) )
sys.stderr.write("# wrote 'select' commands for {} chains\n".format( len(keepchains) ) )
else:
sys.stderr.write("WARNING: NO DBREF RECORDS FOUND IN {}\n".format(pdbfile) )
if chaintracker:
sys.stderr.write("ATOMS WERE FOUND FOR THE FOLLOWING CHAINS:\n{}\n".format( ",".join( sorted(chaintracker.keys() ) ) ) )
# no return
def main(argv, wayout):
if not len(argv):
argv.append('-h')
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__)
parser.add_argument("-p","--pdb", help="PDB format file", required=True)
args = parser.parse_args(argv)
# make PyMOL script with color commands
make_chain_select_commands(args.pdb, wayout)
if __name__ == "__main__":
main(sys.argv[1:], sys.stdout)