forked from dataprofessor/parp1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbackup_app_20Dec2022.txt
135 lines (105 loc) Β· 4.93 KB
/
backup_app_20Dec2022.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
import os
import pickle
import pandas as pd
from PIL import Image
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from padelpy import padeldescriptor
# Page configuration
st.set_page_config(
page_title='PARP1pred',
page_icon='π',
initial_sidebar_state='expanded')
# Session state
if 'smiles_input' not in st.session_state:
st.session_state.smiles_input = ''
if os.path.isfile('molecule.smi'):
os.remove('molecule.smi')
st.sidebar.title('π PARP1pred')
# Input SMILES
st.sidebar.subheader('Input molecule')
def insert_example_smiles():
st.session_state.smiles_input = 'O=C(c1cc(Cc2n[nH]c(=O)c3ccccc23)ccc1F)N1CCN(C(=O)C2CC2)CC1'
def clear_smiles():
st.session_state.smiles_input = ''
smiles_txt = st.sidebar.text_input('Enter SMILES notation', st.session_state.smiles_input)
st.sidebar.button('Example input', on_click=insert_example_smiles)
st.sidebar.button('Clear input', on_click=clear_smiles)
# Default page (loading for the first time)
if st.session_state.smiles_input == '':
st.subheader('Welcome to the PARP1pred app!')
st.info('PARP1pred allow users to predict whether a query molecule is active/inactive towards the PARP1 target protein.')
st.warning('Enter SMILES notation in the sidebar to proceed', icon='π')
with st.expander('About this app'):
st.markdown('''
#### What is PARP1 and its therapeutic importance
Poly (ADP-ribose) polymerase-1 (PARP-1) is an enzyme that catalyzes the ADP-ribosylation of a specific protein and plays a vital role in DNA repair. It has become an attractive target as inhibition of PARP-1 causes a toxic accumulation of DNA double strand breaks in cancer cells, particularly those with BRCA1/2 deficiency, which are found in breast, ovarian, prostate, and pancreatic cancers.
#### Dataset
In our work, we retrieved a human PARP-1 biological dataset from the ChEMBL database. The data was curated, and we received a non-redundant set of 2,018 PARP-1 inhibitors, which were divided into 1,720 active and 298 inactive compounds.
#### Model performance
We selected PubChem as a molecular fingerprint and used a random forest with an oversampling approach to construct the best model. The Matthews correlation coefficients in training, cross-validation, and test sets were 1.00, 0.96, and 0.74, respectively.
#### Python libraries
This app is based on the following Python libraries:
- `streamlit`
- `pandas`
- `rdkit`
- `padelpy`
#### Citing us
T. Lerksuthirat, S. Chitphuk, W. Stitchantrakul, D. Dejsuphong, A.A. Malik, C. Nantasenamat, PARP1PRED: A web server for screening the bioactivity of inhibitors against DNA repair enzyme PARP-1, ***EXCLI Journal*** (2023).
''')
coverimage = Image.open('PARP1pred.jpg')
st.image(coverimage)
else:
st.subheader('βοΈ Input molecule:')
with st.expander('Show SMILES'):
#st.write('**SMILES**')
st.text(smiles_txt)
with st.expander('Show chemical structures'):
#st.write('**Chemical structure**')
smi = Chem.MolFromSmiles(smiles_txt)
Chem.Draw.MolToFile(smi, 'molecule.png', width=900)
mol_image = Image.open('molecule.png')
st.image(mol_image)
# Input SMILES saved to file
f = open('molecule.smi', 'w')
f.write(f'{smiles_txt}\tmol_001')
f.close()
# Compute PADEL descriptors
if st.session_state.smiles_input != '':
st.subheader('π’ Descriptors')
if os.path.isfile('molecule.smi'):
padeldescriptor(mol_dir='molecule.smi',
d_file='descriptors.csv',
descriptortypes='data/PubchemFingerprinter.xml',
detectaromaticity=True,
standardizenitro=True,
standardizetautomers=True,
threads=2,
removesalt=True,
log=True,
fingerprints=True)
descriptors = pd.read_csv('descriptors.csv')
descriptors.drop('Name', axis=1, inplace=True)
with st.expander('Show full set of descriptors as calculated for query molecule'):
#st.write('**Full set of descriptors (calculated for query molecule)**')
st.write(descriptors)
st.write(descriptors.shape)
# Load descriptor subset used in trained model
if st.session_state.smiles_input != '':
model = pickle.load(open('data/oversampling_PubChem_RandomForestClassifier.pkl', 'rb'))
pubchem_subset = model.feature_names_in_
query_desc_1 = descriptors.columns.difference(pubchem_subset)
query_desc_2 = descriptors.drop(query_desc_1, axis=1)
with st.expander('Show subset of descriptors as used in trained model'):
#st.write('**Subset of descriptors (used in trained model)**')
st.write(query_desc_2)
st.write(query_desc_2.shape)
# Read in saved classification model
if st.session_state.smiles_input != '':
st.subheader('π€ Predictions')
pred = int(model.predict(query_desc_2))
if pred == 0:
st.error('Inactive')
if pred == 1:
st.success('Active')