-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcorrelation.py
77 lines (61 loc) · 1.95 KB
/
correlation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
import functools as ft
# always flush print output
print = ft.partial(print, flush=True)
def corr_coeff(A,B = None):
# makes sure they are arrays and have the same shape
A = np.asarray(A)
if B is None:
B = np.asarray(A)
else:
B = np.asarray(B)
assert A.shape == B.shape
assert A.ndim <= 2
# shape of the reduced arrays
# flat_shape = list(A.shape)
# flat_shape[axis] = 1
# mean of input along axis 'axis' & subtract from input arrays themeselves
A_mA = A - A.mean(0)
B_mB = B - B.mean(0)
# calculate covariance
numerator = np.dot(A_mA.T,B_mB)
# axis = 0
# numerator = np.tensordot(A_mA,B_mB, axes=[(axis,), (axis,)])
# Sum of squares across rows
ssA = (A_mA**2).sum(0)
ssB = (B_mB**2).sum(0)
# calcualte sqrt of variances
denominator = np.sqrt(np.outer(ssA, ssB))
# Finally get corr coeff
return numerator / denominator
def thresholding_matrix(C, percentage, save_histo=False):
'''
In this function we will set a threshold
in the correlation matrix in order to keep
only the strongest links.
'''
C = np.nan_to_num(C)
C[np.diag_indices_from(C)] = 0.0
il = np.tril_indices(C.shape[0], k=-1)
# flat = C[il]
# hist, bins = np.histogram(flat,5000)
# ## print len(hist), hist
# ## print len(bins), bins
# h = cumulative_distribution(1.0*hist)
#
# v = (h>=1.0-percentage).nonzero()
# thr = bins[v[0][0]]
flat = np.sort(C[il])
del il
assert flat.ndim == 1, "something went wrong with flattening"
thr_index = int((1 - percentage) * flat.shape[0])
thr = flat[thr_index]
del flat
print("at", thr, end=" ... ")
# create weighted adjacency matrix
# mask = C>thr
# Cbin = np.zeros_like(C)
# Cbin[mask] = C[mask]
# create unweighted adjacency matrix
Cbin = (C > thr).astype(np.int8)
return Cbin