-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnnFunctions.py
154 lines (129 loc) · 6.2 KB
/
nnFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import numpy as np
from scipy.optimize import minimize
from math import sqrt
import pickle
# %%
'''
You need to modify the functions except for initializeWeights()
'''
def initializeWeights(n_in, n_out):
'''
initializeWeights return the random weights for Neural Network given the
number of node in the input layer and output layer
Input:
n_in: number of nodes of the input layer
n_out: number of nodes of the output layer
Output:
W: matrix of random initial weights with size (n_out x (n_in + 1))
'''
np.random.rand(10**3) #(Report 1.1)
epsilon = sqrt(6) / sqrt(n_in + n_out + 1)
W = (np.random.rand(n_out, n_in + 1) * 2 * epsilon) - epsilon
return W
# %%
def sigmoid(z):
'''
Notice that z can be a scalar, a vector or a matrix
return the sigmoid of input z (same dimensions as z)
'''
# remove the next line and replace it with your code
sig = 1.0 / (1.0 + np.exp(-1.0 * z))
return sig
# %%
def nnObjFunction(params, *args):
'''
% nnObjFunction computes the value of objective function (cross-entropy
% with regularization) given the weights and the training data and lambda
% - regularization hyper-parameter.
% Input:
% params: vector of weights of 2 matrices W1 (weights of connections from
% input layer to hidden layer) and W2 (weights of connections from
% hidden layer to output layer) where all of the weights are contained
% in a single vector.
% n_input: number of nodes in input layer (not including the bias node)
% n_hidden: number of nodes in hidden layer (not including the bias node)
% n_class: number of nodes in output layer (number of classes in
% classification problem
% train_data: matrix of training data. Each row of this matrix
% represents the feature vector of the corresponding instance
% train_label: the vector of true labels of training instances. Each entry
% in the vector represents the true label of its corresponding training instance.
% lambda: regularization hyper-parameter. This value is used for fixing the
% overfitting problem.
% Output:
% obj_val: a scalar value representing value of error function
% obj_grad: a SINGLE vector (not a matrix) of gradient value of error function
% NOTE: how to compute obj_grad
% Use backpropagation algorithm to compute the gradient of error function
% for each weights in weight matrices.
'''
# do not remove the next 5 lines
n_input, n_hidden, n_class, train_data, train_label, lambdaval = args
# First reshape 'params' vector into 2 matrices of weights W1 and W2
W1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
W2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
# remove the next two lines and replace them with your code
# obj_val = 0
# obj_grad = params
# one hot encoding of train_level
y_l= np.zeros((n_class, 1), dtype='int64') ## 10 * 1
y_l = (np.arange(n_class) == train_label[:,None]).astype(np.int64) ## 25000 * 10
train_label = y_l.astype(np.int64) ## 25000 * 10
# Forward Feed Propagation
bias_hidden = np.ones(train_data.shape[0]) # Added a column for bias with value 1
train_data = np.column_stack([train_data, bias_hidden]) # 25000*785
sum_hidden = np.dot(train_data, W1.T) # sum_z at hidden layer 25000*50
output_hidden = sigmoid(sum_hidden)# output at hidden layer 25000*50
out_bias = np.ones(output_hidden.shape[0]) # Added a column for bias with value 1
output_hidden_new = np.column_stack([output_hidden, out_bias]) # 25000*51
sum_out = np.dot(output_hidden_new, W2.T) # sum_l at output layer 25000 * 10
o_l = sigmoid(sum_out) # 25000 * 10
# Backward Feed Propagation
# gradient of W2
delta_o = (o_l - train_label) # 25000*10
grad_W2 = np.dot(output_hidden_new.T, delta_o) # 51 * 10
# gradient of W1
W1_1 = np.multiply(np.subtract(1, output_hidden_new), output_hidden_new) # 25000*51
W1_2 = np.dot(delta_o , W2) # 25000 * 51
W1_tot = np.multiply(W1_1 , W1_2) # 25000 * 51
grad_W1 = np.dot(train_data.T, W1_tot) # 785 * 51
grad_W1 = grad_W1[:,0:n_hidden] # 785 * 50
# Loss function calculation
N = train_data.shape[0] #25000
loss_1 = np.multiply(y_l, np.log(o_l)) ## 25000 * 10
loss_2 = np.multiply(np.subtract(1, y_l), np.log(np.subtract(1, o_l))) ## 25000 * 10
loss_function = - np.mean(np.sum(np.add(loss_1, loss_2), axis= 1)) ## eq. 7 scalar
# Regularization Calculation
reg_weights = np.sum(np.sum(np.square(W1)) + np.sum(np.square(W2))) ## scalar
L2_reg = np.multiply(np.divide(lambdaval, 2*N), reg_weights) ## scalar
obj_val = np.add(loss_function , L2_reg) ## eq. 15 scalar
#Update the gradient functions
grad_W1_new = np.divide(np.add(grad_W1.T, np.multiply(lambdaval, W1)), N) ## 50 * 785
grad_W2_new = np.divide(np.add(grad_W2.T, np.multiply(lambdaval, W2)), N) ## 10 * 51
obj_grad = np.concatenate((grad_W1_new.ravel(), grad_W2_new.ravel()),0) ## (39760, )
return (obj_val,obj_grad)
# %%
def nnPredict(W1, W2, data):
'''
% nnPredict predicts the label of data given the parameter W1, W2 of Neural
% Network.
% Input:
% W1: matrix of weights for hidden layer units
% W2: matrix of weights for output layer units
% data: matrix of data. Each row of this matrix represents the feature vector for the corresponding data instance
% Output:
% label: a column vector of predicted labels
'''
# remove the next line and replace it with your code
labels = np.zeros((data.shape[0],1))
new_bias_H = np.ones(data.shape[0]) # Added a column for bias with value 1
data = np.hstack([data, new_bias_H[:, None]])
sum_hidden = np.dot(data ,W1.T) # sum_z at hidden layer 25000*784
output_hidden = sigmoid(sum_hidden)# output at hidden layer 25000*784
new_bias_O = np.ones(output_hidden.shape[0]) # Added a column for bias with value 1
out_hidden = np.hstack([output_hidden, new_bias_O[:, None]])
o_l = sigmoid(np.dot(out_hidden,W2.T)) # sig(sum_l) at output layer
max_val = np.argmax(o_l, axis=1)
labels = max_val.T
return labels
# %%