{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "def myRelu(z):\n", " x = np.clip(z,0,np.inf)\n", " return x\n", "def dMyRelu(z):\n", " tmp = (z>0).astype(float)\n", " J = np.diag(tmp)\n", " return J\n", "\n", "def mySigmoid(z):\n", " x = 1./(np.exp(-z)+1.)\n", " return x\n", "def dMySigmoid(z):\n", " x = mySigmoid(z)\n", " J = np.diag(x*(1.-x))\n", " return J" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "z = np.random.normal(0,1,5)\n", "x = myRelu(z)\n", "J = dMyRelu(z)\n", "print(\"z = {},\\nx = {},\\nJ = {}\".format(z, x, J))\n", "\n", "x = mySigmoid(z)\n", "J = dMySigmoid(z)\n", "print(\"z = {},\\nx = {},\\nJ = {}\".format(z, x, J))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "h = [2, 10, 10, 2]\n", "depth = len(h)-1\n", "W = [np.random.rand(h[d+1],h[d]) for d in range(depth)]\n", "\n", "def f(x):\n", " z_hist = [0]; x_hist = [x]\n", " for d in range(depth-1):\n", " z = W[d]@x\n", " x = mySigmoid(z)\n", " z_hist.append(z)\n", " x_hist.append(x)\n", " return W[-1]@x, z_hist, x_hist\n", "\n", "x0 = np.random.normal(0,1,h[0])\n", "print(f(x0)[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def backProp(x):\n", " dfdW_vec = [np.zeros((h[-1],h[d+1]*h[d])) for d in range(depth)]\n", " \n", " _, z_hist, x_hist = f(x)\n", " \n", " delta = np.eye(h[-1])\n", " dfdW_vec[-1] = delta@np.kron(x_hist[-1].T, np.eye(h[-1]))\n", " for d in reversed(range(depth-1)):\n", " delta = delta@W[d+1]@dMySigmoid(z_hist[d+1])\n", " dfdW_vec[d] = delta@np.kron(x_hist[d].T, np.eye(h[d+1]))\n", " \n", " dfdx = delta@W[0]\n", " return dfdx, dfdW_vec\n", "\n", "\n", "print(backProp(x0)[1][0].shape)\n", "# print(np.linalg.norm(df_forward(x0)-df_backward(x0), np.inf))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def gradientChecking(x, f, df):\n", " n = x.shape[0]\n", " d = f(x)[0].shape[0]\n", " J, eps = np.zeros((d,n)), 1e-6\n", " for i in range(n):\n", " ei = np.eye(n)[:,i]\n", " J[:,i] = (f(x+eps*ei)[0]-f(x-eps*ei)[0])/(2*eps)\n", " return np.linalg.norm(J-df(x)[0], np.inf) < 1e-4\n", "\n", "\n", "x0 = np.random.normal(0,1,h[0])\n", "print(\"Result of gradient checking:\", gradientChecking(x0, f, backProp))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x0 = np.random.normal(0,1,h[0])\n", "y0, _, _ = f(x0)\n", "\n", "print(x0, y0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "h = [2, 10, 10, 2]\n", "depth = len(h)-1\n", "W = [np.random.rand(h[d+1],h[d])/np.sqrt(h[d+1]) for d in range(depth)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "alpha = 1e-1\n", "N_batch, N_epoch = 100, 1000\n", "Loss = np.zeros(N_epoch)\n", "for epoch in range(N_epoch):\n", " X = np.random.rand(N_batch, h[0])*2-1\n", " Y = np.zeros((N_batch, h[-1]))\n", " for i in range(N_batch):\n", " x = X[i,:]\n", " y, _, _ = f(x)\n", " Y[i,:] = y\n", " Loss[epoch] += np.sum((y-x)**2)\n", " dLdf = 2.*(y-x).T\n", " _, dfdW_vec = backProp(x)\n", " if i == 0: dLdW_vec = [np.zeros((1,h[d]*h[d+1])) for d in range(depth)]\n", " for d in range(depth):\n", " dLdW_vec[d] += dLdf@dfdW_vec[d]\n", " for d in range(depth): \n", " dLdWd = (dLdW_vec[d].reshape(W[d].T.shape)).T\n", " W[d] -= alpha/N_batch*dLdWd\n", " \n", " if not np.mod(epoch,100):\n", " print('{}th epoch, Loss: {}'.format(epoch+1,Loss[epoch]/N_batch))\n", " plt.figure()\n", " plt.plot(X, Y, 'o')\n", " plt.plot([-1,1],[-1,1], 'k', linewidth=5)\n", " plt.grid()\n", " plt.axis('equal')\n", " plt.show()\n", "\n", " \n", "print('{}th epoch, Loss: {}'.format(epoch+1,Loss[epoch]/N_batch))\n", "plt.figure()\n", "plt.plot(X, Y, 'o')\n", "plt.plot([-1,1],[-1,1], 'k', linewidth=5)\n", "plt.grid()\n", "plt.axis('equal')\n", "plt.show()\n", "\n", " \n", "plt.figure()\n", "plt.plot(Loss/N_batch)\n", "plt.grid()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = np.array([[1,2,3],\n", " [4,5,6]])\n", "b = np.array([[1,4,2,5,3,6]]) # b = vec(a)\n", "\n", "print(a,b)\n", "print(b.reshape(a.T.shape).T)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }