In [None]:
import numpy as np
import matplotlib.pyplot as plt

def myRelu(z):
 x = np.clip(z,0,np.inf)
 return x
def dMyRelu(z):
 tmp = (z>0).astype(float)
 J = np.diag(tmp)
 return J

def mySigmoid(z):
 x = 1./(np.exp(-z)+1.)
 return x
def dMySigmoid(z):
 x = mySigmoid(z)
 J = np.diag(x*(1.-x))
 return J

In [None]:
z = np.random.normal(0,1,5)
x = myRelu(z)
J = dMyRelu(z)
print("z = {},\nx = {},\nJ = {}".format(z, x, J))

x = mySigmoid(z)
J = dMySigmoid(z)
print("z = {},\nx = {},\nJ = {}".format(z, x, J))

In [None]:
h = [2, 10, 10, 2]
depth = len(h)-1
W = [np.random.rand(h[d+1],h[d]) for d in range(depth)]

def f(x):
 z_hist = [0]; x_hist = [x]
 for d in range(depth-1):
 z = W[d]@x
 x = mySigmoid(z)
 z_hist.append(z)
 x_hist.append(x)
 return W[-1]@x, z_hist, x_hist

x0 = np.random.normal(0,1,h[0])
print(f(x0)[0])

In [None]:
def backProp(x):
 dfdW_vec = [np.zeros((h[-1],h[d+1]*h[d])) for d in range(depth)]
 
 _, z_hist, x_hist = f(x)
 
 delta = np.eye(h[-1])
 dfdW_vec[-1] = delta@np.kron(x_hist[-1].T, np.eye(h[-1]))
 for d in reversed(range(depth-1)):
 delta = delta@W[d+1]@dMySigmoid(z_hist[d+1])
 dfdW_vec[d] = delta@np.kron(x_hist[d].T, np.eye(h[d+1]))
 
 dfdx = delta@W[0]
 return dfdx, dfdW_vec


print(backProp(x0)[1][0].shape)
# print(np.linalg.norm(df_forward(x0)-df_backward(x0), np.inf))

In [None]:
def gradientChecking(x, f, df):
 n = x.shape[0]
 d = f(x)[0].shape[0]
 J, eps = np.zeros((d,n)), 1e-6
 for i in range(n):
 ei = np.eye(n)[:,i]
 J[:,i] = (f(x+eps*ei)[0]-f(x-eps*ei)[0])/(2*eps)
 return np.linalg.norm(J-df(x)[0], np.inf) < 1e-4


x0 = np.random.normal(0,1,h[0])
print("Result of gradient checking:", gradientChecking(x0, f, backProp))

In [None]:
x0 = np.random.normal(0,1,h[0])
y0, _, _ = f(x0)

print(x0, y0)

In [None]:
h = [2, 10, 10, 2]
depth = len(h)-1
W = [np.random.rand(h[d+1],h[d])/np.sqrt(h[d+1]) for d in range(depth)]

In [None]:
alpha = 1e-1
N_batch, N_epoch = 100, 1000
Loss = np.zeros(N_epoch)
for epoch in range(N_epoch):
 X = np.random.rand(N_batch, h[0])*2-1
 Y = np.zeros((N_batch, h[-1]))
 for i in range(N_batch):
 x = X[i,:]
 y, _, _ = f(x)
 Y[i,:] = y
 Loss[epoch] += np.sum((y-x)**2)
 dLdf = 2.*(y-x).T
 _, dfdW_vec = backProp(x)
 if i == 0: dLdW_vec = [np.zeros((1,h[d]*h[d+1])) for d in range(depth)]
 for d in range(depth):
 dLdW_vec[d] += dLdf@dfdW_vec[d]
 for d in range(depth): 
 dLdWd = (dLdW_vec[d].reshape(W[d].T.shape)).T
 W[d] -= alpha/N_batch*dLdWd
 
 if not np.mod(epoch,100):
 print('{}th epoch, Loss: {}'.format(epoch+1,Loss[epoch]/N_batch))
 plt.figure()
 plt.plot(X, Y, 'o')
 plt.plot([-1,1],[-1,1], 'k', linewidth=5)
 plt.grid()
 plt.axis('equal')
 plt.show()

 
print('{}th epoch, Loss: {}'.format(epoch+1,Loss[epoch]/N_batch))
plt.figure()
plt.plot(X, Y, 'o')
plt.plot([-1,1],[-1,1], 'k', linewidth=5)
plt.grid()
plt.axis('equal')
plt.show()

 
plt.figure()
plt.plot(Loss/N_batch)
plt.grid()
plt.show()

In [None]:
a = np.array([[1,2,3],
 [4,5,6]])
b = np.array([[1,4,2,5,3,6]]) # b = vec(a)

print(a,b)
print(b.reshape(a.T.shape).T)