-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge_model_xmls.py
96 lines (88 loc) · 3.85 KB
/
merge_model_xmls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "pandas",
# ]
# ///
import math
import os
import pandas as pd
import sys
import traceback
def dump_xml(df_out, name, header, dirname):
fake_analyte_names = ['SC{:04d}'.format(i) for i in range(1, df_out.shape[0] + 1)]
df_out['analyte name'] = fake_analyte_names
xml = df_out[colnames].rename({'analyte name': 'name'}, axis=1) \
.to_xml(index=False, row_name='analyte', root_name='model', attr_cols=['name'] + colnames[1:])
xml = header + xml[46:] + '\n</ModelData>'
with open(os.path.join(dirname, f'{name}.xml'), 'w+') as outfile:
outfile.write(xml)
colnames = ['analyte name', 'mw', 's', 'D', 'f', 'f_f0', 'vbar20', 'extinction', 'axial', 'sigma', 'delta',
'oligomer', 'shape', 'type', 'molar', 'signal']
aggregations = {col: 'mean' for col in
('mw', 'D', 'f', 'extinction', 'axial', 'sigma', 'delta', 'oligomer', 'shape', 'type', 'molar')}
aggregations['signal'] = 'sum'
dfs = []
def merge_models(dir_input, file_save=None):
dir = os.path.dirname(__file__)
dirname = os.path.join(dir, dir_input)
metadata = []
header = ''
max_var = 0
print(f'{len(os.listdir(dirname))} files found, start reading')
for filename in os.listdir(dirname):
if not filename.endswith('.xml'):
continue
# read first file header
if not header:
with open(os.path.join(dirname, filename)) as infile:
header = '\n'.join(infile.readlines()[:4])
# read analytes
try:
model_xml = pd.read_xml(os.path.join(dirname, filename), xpath='//ModelData/model/analyte')
dfs.append(model_xml)
except Exception as e:
traceback.print_exc()
raise e
# read model variance
try:
x = pd.read_xml(os.path.join(dirname, filename), xpath='//ModelData/model')
metadata.append(x)
except Exception as e:
traceback.print_exc()
raise e
metadata = pd.concat(metadata, axis=0)
max_var = metadata.variance.max()
# concatenate
df = pd.concat(dfs, axis=0)
print('finished importing data. starting to find unique analytes')
# aggregate by s, f_f0, vbar20
df_out = df.groupby(by=['s', 'f_f0', 'vbar20'], sort=False).agg(aggregations).reset_index(drop=False)
df_out['signal'] = df_out['signal'] / metadata.shape[0]
fake_analyte_names = ['SC{:04d}'.format(i) for i in range(1, df_out.shape[0] + 1)]
df_out['analyte name'] = fake_analyte_names
header = header.split('variance="')
header = header[0] + f'variance="{max_var}' + header[1][header[1].index('"'):]
print('start dumping to xml')
xml = df_out[colnames].rename({'analyte name': 'name'}, axis=1) \
.to_xml(index=False, row_name='analyte', root_name='model', attr_cols=['name'] + colnames[1:])
xml = header + xml[46:] + '\n</ModelData>'
if not file_save:
filename = dir_input.replace("\\", "").replace("/", "").replace(".", "")
file_save = os.path.join(dirname, f'm{filename}.xml')
with open(file_save, 'w+') as outfile:
outfile.write(xml)
print('starting c(s,ff0) file generation')
df_out['s'] *= 1e13
lm_viscosity = 0.01002 * 0.1 # D2O Viskosity
lm_density = 0.99832 * 1000 # D2O Density
df_out['r_h'] = 1 / df_out.D / 1e-4 * 1.38065e-23 * 293.15 / 6 / lm_viscosity / math.pi
df_out.to_csv(str(file_save).replace('xml','-c(s_ff0).dat'), index=False, header=False,
columns=['s', 'mw', 'f_f0', 'D', 'r_h', 'signal'], sep='\t')
print('finished')
if __name__ == '__main__':
if len(sys.argv) != 2:
dir_input = input('Enter path to directory which should be condensed:')
else:
dir_input = sys.argv[1]
merge_models(dir_input)