{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "def myRelu(z):\n",
    "    x = np.clip(z,0,np.inf)\n",
    "    return x\n",
    "def dMyRelu(z):\n",
    "    tmp = (z>0).astype(float)\n",
    "    J = np.diag(tmp)\n",
    "    return J\n",
    "\n",
    "def mySigmoid(z):\n",
    "    x = 1./(np.exp(-z)+1.)\n",
    "    return x\n",
    "def dMySigmoid(z):\n",
    "    x = mySigmoid(z)\n",
    "    J = np.diag(x*(1.-x))\n",
    "    return J"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "z = np.random.normal(0,1,5)\n",
    "x = myRelu(z)\n",
    "J = dMyRelu(z)\n",
    "print(\"z = {},\\nx = {},\\nJ = {}\".format(z, x, J))\n",
    "\n",
    "x = mySigmoid(z)\n",
    "J = dMySigmoid(z)\n",
    "print(\"z = {},\\nx = {},\\nJ = {}\".format(z, x, J))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "h = [2, 10, 10, 2]\n",
    "depth = len(h)-1\n",
    "W = [np.random.rand(h[d+1],h[d]) for d in range(depth)]\n",
    "\n",
    "def f(x):\n",
    "    z_hist = [0]; x_hist = [x]\n",
    "    for d in range(depth-1):\n",
    "        z = W[d]@x\n",
    "        x = mySigmoid(z)\n",
    "        z_hist.append(z)\n",
    "        x_hist.append(x)\n",
    "    return W[-1]@x, z_hist, x_hist\n",
    "\n",
    "x0 = np.random.normal(0,1,h[0])\n",
    "print(f(x0)[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def backProp(x):\n",
    "    dfdW_vec = [np.zeros((h[-1],h[d+1]*h[d])) for d in range(depth)]\n",
    "    \n",
    "    _, z_hist, x_hist = f(x)\n",
    "    \n",
    "    delta = np.eye(h[-1])\n",
    "    dfdW_vec[-1] = delta@np.kron(x_hist[-1].T, np.eye(h[-1]))\n",
    "    for d in reversed(range(depth-1)):\n",
    "        delta = delta@W[d+1]@dMySigmoid(z_hist[d+1])\n",
    "        dfdW_vec[d] = delta@np.kron(x_hist[d].T, np.eye(h[d+1]))\n",
    "        \n",
    "    dfdx = delta@W[0]\n",
    "    return dfdx, dfdW_vec\n",
    "\n",
    "\n",
    "print(backProp(x0)[1][0].shape)\n",
    "# print(np.linalg.norm(df_forward(x0)-df_backward(x0), np.inf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gradientChecking(x, f, df):\n",
    "    n = x.shape[0]\n",
    "    d = f(x)[0].shape[0]\n",
    "    J, eps = np.zeros((d,n)), 1e-6\n",
    "    for i in range(n):\n",
    "        ei = np.eye(n)[:,i]\n",
    "        J[:,i] = (f(x+eps*ei)[0]-f(x-eps*ei)[0])/(2*eps)\n",
    "    return np.linalg.norm(J-df(x)[0], np.inf) < 1e-4\n",
    "\n",
    "\n",
    "x0 = np.random.normal(0,1,h[0])\n",
    "print(\"Result of gradient checking:\", gradientChecking(x0, f, backProp))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x0 = np.random.normal(0,1,h[0])\n",
    "y0, _, _ = f(x0)\n",
    "\n",
    "print(x0, y0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "h = [2, 10, 10, 2]\n",
    "depth = len(h)-1\n",
    "W = [np.random.rand(h[d+1],h[d])/np.sqrt(h[d+1]) for d in range(depth)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "alpha = 1e-1\n",
    "N_batch, N_epoch = 100, 1000\n",
    "Loss = np.zeros(N_epoch)\n",
    "for epoch in range(N_epoch):\n",
    "    X = np.random.rand(N_batch, h[0])*2-1\n",
    "    Y = np.zeros((N_batch, h[-1]))\n",
    "    for i in range(N_batch):\n",
    "        x = X[i,:]\n",
    "        y, _, _ = f(x)\n",
    "        Y[i,:] = y\n",
    "        Loss[epoch] += np.sum((y-x)**2)\n",
    "        dLdf = 2.*(y-x).T\n",
    "        _, dfdW_vec = backProp(x)\n",
    "        if i == 0: dLdW_vec = [np.zeros((1,h[d]*h[d+1])) for d in range(depth)]\n",
    "        for d in range(depth):\n",
    "            dLdW_vec[d] += dLdf@dfdW_vec[d]\n",
    "    for d in range(depth): \n",
    "        dLdWd = (dLdW_vec[d].reshape(W[d].T.shape)).T\n",
    "        W[d] -= alpha/N_batch*dLdWd\n",
    "        \n",
    "    if not np.mod(epoch,100):\n",
    "        print('{}th epoch, Loss: {}'.format(epoch+1,Loss[epoch]/N_batch))\n",
    "        plt.figure()\n",
    "        plt.plot(X, Y, 'o')\n",
    "        plt.plot([-1,1],[-1,1], 'k', linewidth=5)\n",
    "        plt.grid()\n",
    "        plt.axis('equal')\n",
    "        plt.show()\n",
    "\n",
    "        \n",
    "print('{}th epoch, Loss: {}'.format(epoch+1,Loss[epoch]/N_batch))\n",
    "plt.figure()\n",
    "plt.plot(X, Y, 'o')\n",
    "plt.plot([-1,1],[-1,1], 'k', linewidth=5)\n",
    "plt.grid()\n",
    "plt.axis('equal')\n",
    "plt.show()\n",
    "\n",
    "        \n",
    "plt.figure()\n",
    "plt.plot(Loss/N_batch)\n",
    "plt.grid()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.array([[1,2,3],\n",
    "              [4,5,6]])\n",
    "b = np.array([[1,4,2,5,3,6]]) # b = vec(a)\n",
    "\n",
    "print(a,b)\n",
    "print(b.reshape(a.T.shape).T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}