-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
84 lines (72 loc) · 3.24 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
modified from https://arxiv.org/abs/1811.00855
SR-GNN: Session-based Recommendation with graph neural networks
"""
import numpy as np
def data_masks(all_usr_pois, item_tail):
us_lens = [len(upois) for upois in all_usr_pois]
len_max = max(us_lens)
us_pois = [upois + item_tail * (len_max - le) for upois, le in zip(all_usr_pois, us_lens)]
us_msks = [[1] * le + [0] * (len_max - le) for le in us_lens]
return us_pois, us_msks, len_max
def split_validation(train_set, valid_portion):
train_set_x, train_set_y = train_set
n_samples = len(train_set_x)
sidx = np.arange(n_samples, dtype='int32')
np.random.shuffle(sidx)
n_train = int(np.round(n_samples * (1. - valid_portion)))
valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
train_set_x = [train_set_x[s] for s in sidx[:n_train]]
train_set_y = [train_set_y[s] for s in sidx[:n_train]]
return (train_set_x, train_set_y), (valid_set_x, valid_set_y)
class Data():
def __init__(self, data, shuffle=False):
inputs = data[0]
inputs, mask, len_max = data_masks(inputs, [0])
self.inputs = np.asarray(inputs)
self.mask = np.asarray(mask)
self.len_max = len_max
self.targets = np.asarray(data[1])
self.length = len(inputs)
self.shuffle = shuffle
def generate_batch(self, batch_size):
if self.shuffle:
shuffled_arg = np.arange(self.length)
np.random.shuffle(shuffled_arg)
self.inputs = self.inputs[shuffled_arg]
self.mask = self.mask[shuffled_arg]
self.targets = self.targets[shuffled_arg]
n_batch = int(self.length / batch_size)
if self.length % batch_size != 0:
n_batch += 1
slices = np.split(np.arange(n_batch * batch_size), n_batch)
slices[-1] = np.arange(self.length-batch_size, self.length)
return slices
def get_slice(self, index):
items, n_node, A_in, A_out, alias_inputs = [], [], [], [], []
for u_input in self.inputs[index]:
n_node.append(len(np.unique(u_input)))
max_n_node = np.max(n_node)
for u_input in self.inputs[index]:
node = np.unique(u_input)
items.append(node.tolist() + (max_n_node - len(node)) * [0])
u_A = np.zeros((max_n_node, max_n_node))
for i in np.arange(len(u_input) - 1):
if u_input[i + 1] == 0:
break
u = np.where(node == u_input[i])[0][0]
v = np.where(node == u_input[i + 1])[0][0]
u_A[u][v] = 1
u_sum_in = np.sum(u_A, 0)
u_sum_in[np.where(u_sum_in == 0)] = 1
u_A_in = np.divide(u_A, u_sum_in)
u_A_in = u_A_in + np.eye(u_A_in.shape[0])
u_sum_out = np.sum(u_A, 1)
u_sum_out[np.where(u_sum_out == 0)] = 1
u_A_out = np.divide(u_A.transpose(), u_sum_out)
u_A_out = u_A_out + np.eye(u_A_out.shape[0])
A_in.append(u_A_in)
A_out.append(u_A_out)
alias_inputs.append([np.where(node == i)[0][0] for i in u_input])
return A_in, A_out, alias_inputs, items, self.mask[index], self.targets[index]