-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
136 lines (102 loc) · 5.28 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import numpy as np
class MLP:
# Hyperparameters initialize
def __init__(self, hidden_units, minibatch_size, regularization_rate, learning_rate):
self.hidden_units = hidden_units
self.minibatch_size = minibatch_size
self.regularization_rate = regularization_rate
self.learning_rate = learning_rate
# ReLu (Rectified Linear Unit) function
def relu_function(self, matrix_content, matrix_dim_x, matrix_dim_y):
ret_vector = np.zeros((matrix_dim_x, matrix_dim_y))
for i in range(matrix_dim_x):
for j in range(matrix_dim_y):
ret_vector[i, j] = max(0, matrix_content[i,j])
return ret_vector
# the gradient of ReLu (Rectified Linear Unit) function
def grad_relu(self, matrix_content, matrix_dim_x, matrix_dim_y):
ret_vector = np.zeros((matrix_dim_x, matrix_dim_y))
for i in range(matrix_dim_x):
for j in range(matrix_dim_y):
if matrix_content[i, j] > 0:
ret_vector[i, j] = 1
else:
ret_vector[i, j] = 0
return ret_vector
# Softmax fucntion
def softmax_function(self, vector_content):
return np.exp(vector_content - np.max(vector_content)) / np.sum(np.exp(vector_content - np.max(vector_content)), axis=0)
# generator for mini-batch
def iterate_minibatches(self, inputs, targets, batch_size, shuffle=False):
assert inputs.shape[0] == targets.shape[0] # 만약 input / output shape 체크
if shuffle: # batch shuffle 적용
indices = np.arange(inputs.shape[0])
np.random.shuffle(indices)
for start_idx in range(0, inputs.shape[0] - batch_size + 1, batch_size):
if shuffle:
excerpt = indices[start_idx:start_idx + batch_size]
else:
excerpt = slice(start_idx, start_idx + batch_size)
yield inputs[excerpt], targets[excerpt]
#function to train the MLP model
def train(self, trainX, trainY, epochs):
# parameter initialize
w1_mat = np.random.randn(self.hidden_units, 28*28) *np.sqrt(2./(self.hidden_units+28*28))
w2_mat = np.random.randn(10, self.hidden_units) *np.sqrt(2./(10+self.hidden_units))
b1_vec = np.zeros((self.hidden_units, 1))
b2_vec = np.zeros((10, 1))
# input / output size reshape
trainX = np.reshape(trainX, (trainX.shape[0], 28*28))
trainY = np.reshape(trainY, (trainY.shape[0], 1))
for num_epochs in range(epochs) :
if num_epochs % 2 == 0:
print("Current epoch number : ", num_epochs)
for batch in self.iterate_minibatches(trainX, trainY, self.minibatch_size, shuffle=True):
x_batch, y_batch = batch
x_batch = x_batch.T
y_batch = y_batch.T
# logit calc and apply ReLU
z1 = np.dot(w1_mat, x_batch) + b1_vec
a1 = self.relu_function(z1, self.hidden_units, self.minibatch_size)
# logit calc and apply Softmax
z2 = np.dot(w2_mat, a1) + b2_vec
a2_softmax = self.softmax_function(z2)
# cross-entropy for loss function
gt_vector = np.zeros((10, self.minibatch_size))
for example_num in range(self.minibatch_size):
gt_vector[y_batch[0, example_num], example_num] = 1
# regularization for weights
d_w2_mat = self.regularization_rate*w2_mat
d_w1_mat = self.regularization_rate*w1_mat
# backpropagation
delta_2 = np.array(a2_softmax - gt_vector)
d_w2_mat = d_w2_mat + np.dot(delta_2, (np.matrix(a1)).T)
d_b2_vec = np.sum(delta_2, axis=1, keepdims=True)
delta_1 = np.array(np.multiply((np.dot(w2_mat.T, delta_2)), self.grad_relu(z1, self.hidden_units, self.minibatch_size)))
d_w1_mat = d_w1_mat + np.dot(delta_1, np.matrix(x_batch).T)
d_b1_vec = np.sum(delta_1, axis=1, keepdims=True)
d_w2_mat = d_w2_mat/self.minibatch_size
d_w1_mat = d_w1_mat/self.minibatch_size
d_b2_vec = d_b2_vec/self.minibatch_size
d_b1_vec = d_b1_vec/self.minibatch_size
# update weights
w2_mat = w2_mat - self.learning_rate*d_w2_mat
b2_vec = b2_vec - self.learning_rate*d_b2_vec
w1_mat = w1_mat - self.learning_rate*d_w1_mat
b1_vec = b1_vec - self.learning_rate*d_b1_vec
self.w1_mat, self.b1_vec, self.w2_mat, self.b2_vec = w1_mat, b1_vec, w2_mat, b2_vec
# function to test the MLP model
def test(self, testX):
output_labels = np.zeros(testX.shape[0])
num_examples = testX.shape[0]
testX = np.reshape(testX, (num_examples, 28*28))
testX = testX.T
# test with trained model
z1 = np.dot(self.w1_mat, testX) + self.b1_vec
a1 = self.relu_function(z1, self.hidden_units, num_examples)
z2 = np.dot(self.w2_mat, a1) + self.b2_vec
a2_softmax = self.softmax_function(z2)
for i in range(num_examples):
pred_col = a2_softmax[:, [i]]
output_labels[i] = np.argmax(pred_col)
return output_labels