diff --git a/lab09/lab_solved.ipynb b/lab09/lab_solved.ipynb new file mode 100644 index 0000000..e0a51e4 --- /dev/null +++ b/lab09/lab_solved.ipynb @@ -0,0 +1,1574 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-03-31T12:22:16.372625Z", + "start_time": "2020-03-31T12:22:16.345534Z" + } + }, + "source": [ + "EE-311\n", + "======\n", + "\n", + "Lab 9: Convolutional Neural Networks\n", + "------------------------------------\n", + "\n", + "created by Francois Marelli, Arnaud Pannatier and Roberto Boghetti on 05.04.2022\n", + "\n", + "In this lab, we will illustrate the basics of PyTorch, an open source framework for deep learning.\n", + "\n", + "We will also have a look at Convolutional Neural Networks (CNNs), and see why the convolution operation is very useful when working on images." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-04-28T07:21:13.343695Z", + "start_time": "2020-04-28T07:21:13.318856Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "\n", + "from tqdm.notebook import trange\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction to PyTorch and Tensors\n", + "\n", + "PyTorch is an open source machine learning framework that implements efficient algorithms for deep learning. It shares many similarities with NumPy, and in this section we will give a brief overview of the basic functionalities needed in this lab.\n", + "\n", + "Instead of arrays, PyTorch uses Tensors to store data. They are very similar to NumPy's ndarrays, with a few additional functionalities. Mainly, Tensors can be moved to GPU and implement automatic differentiation (autograd).\n", + "\n", + "### Creating Tensors\n", + "\n", + "Tensors can be created in similar ways to NumPy arrays:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor from list:\n", + " tensor([[1., 2., 3.],\n", + " [4., 5., 6.]])\n", + "\n", + "Zero tensor:\n", + " tensor([[0., 0., 0.],\n", + " [0., 0., 0.]])\n", + "\n", + "Empty tensor:\n", + " tensor([[0., 0., 0.],\n", + " [0., 0., 0.]])\n" + ] + } + ], + "source": [ + "# From a list\n", + "a_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]])\n", + "\n", + "print('Tensor from list:\\n', a_tensor)\n", + "\n", + "# Full of zeros\n", + "zeros = torch.zeros(2, 3)\n", + "\n", + "print('\\nZero tensor:\\n', zeros)\n", + "\n", + "# Empty\n", + "empty = torch.empty(2, 3)\n", + "\n", + "print('\\nEmpty tensor:\\n', empty)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "PyTorch and NumPy can be interfaced easily. It is possible to go from array to Tensor, and back." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor from array:\n", + " tensor([[1, 2, 3],\n", + " [4, 5, 6]])\n", + "\n", + "Array from Tensor:\n", + " [[1 2 3]\n", + " [4 5 6]]\n" + ] + } + ], + "source": [ + "# NumPy array\n", + "a_array = np.array([[1, 2, 3], [4, 5, 6]])\n", + "\n", + "# Create Tensor from array\n", + "a_tensor = torch.from_numpy(a_array)\n", + "\n", + "print('Tensor from array:\\n', a_tensor)\n", + "\n", + "# Convert Tensor to array\n", + "a_array = a_tensor.numpy()\n", + "print('\\nArray from Tensor:\\n', a_array)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tensor properties\n", + "\n", + "Just like arrays, Tensors are characterized by their `shape` and `type`.\n", + "\n", + "Note that the number of elements in a Tensor is given by `nelement`, which is a function." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: torch.Size([2, 3])\n", + "Type: torch.int64\n", + "Elements: 6\n" + ] + } + ], + "source": [ + "print('Shape:', a_tensor.shape)\n", + "\n", + "print('Type:', a_tensor.dtype)\n", + "\n", + "print('Elements:', a_tensor.nelement())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Operation over Tensors\n", + "\n", + "Mathematical operations over Tensors are very similar to NumPy. Standard operators apply element-wise.\n", + "\n", + "Some operations require the type to be float (conversion is not automatic). Casting a Tensor to a different type is quite simple." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sum:\n", + " tensor([[3., 3., 3.],\n", + " [3., 3., 3.]])\n", + "\n", + "Product:\n", + " tensor([[2., 2., 2.],\n", + " [2., 2., 2.]])\n", + "\n", + "Dot:\n", + " tensor([[6., 6.],\n", + " [6., 6.]])\n" + ] + } + ], + "source": [ + "shape = 2, 3\n", + "\n", + "ones = torch.ones(shape)\n", + "twos = torch.full(shape, 2)\n", + "\n", + "sum_result = ones + twos\n", + "prod_result = ones * twos\n", + "\n", + "# Type conversion to float\n", + "ones = ones.float()\n", + "twos = twos.float()\n", + "\n", + "dot_result = ones @ twos.T\n", + "\n", + "print('Sum:\\n', sum_result)\n", + "print('\\nProduct:\\n', prod_result)\n", + "print('\\nDot:\\n', dot_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As with NumPy, it is easy to compute the sum or mean of a Tensor.\n", + "\n", + "If needed, they can be converted back to a scalar by using the `item` function." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sum: tensor(21.)\n", + "Mean: tensor(3.5000)\n", + "\n", + "Scalar mean: 3.5\n" + ] + } + ], + "source": [ + "a_tensor = a_tensor.float()\n", + "\n", + "print('Sum:', a_tensor.sum())\n", + "print('Mean:', a_tensor.mean())\n", + "\n", + "print('\\nScalar mean:', a_tensor.mean().item())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexing\n", + "\n", + "Indexing and slicing is exactly the same in PyTorch as in NumPy.\n", + "\n", + "Adding an empty dimension can be done using `None`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tensor:\n", + "tensor([[1., 2., 3.],\n", + " [4., 5., 6.]])\n", + "\n", + "Slice of the Tensor:\n", + "tensor([[1., 3.],\n", + " [4., 6.]])\n", + "\n", + "Added dimension: torch.Size([2, 1, 3])\n" + ] + } + ], + "source": [ + "a_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]])\n", + "\n", + "print('Tensor:')\n", + "print(a_tensor)\n", + "\n", + "print('\\nSlice of the Tensor:')\n", + "print(a_tensor[:, 0:3:2])\n", + "\n", + "b_tensor = a_tensor[:, None, :]\n", + "print('\\nAdded dimension:', b_tensor.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Automatic differentiation with autograd\n", + "\n", + "One of the main features of PyTorch is autograd. This provides automatic differentiation, which is very useful for backpropagation!\n", + "\n", + "Instead of computing all the gradients manually like last week, we let autograd do the work.\n", + "\n", + "Can you manually check the following result?\n", + "\n", + "*$\\frac{\\partial y}{\\partial x} = 2\\ ( 3\\ ( x + 4 ) )\\ 3 = 18 x + 72 = 36 + 72 = 108$*" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x: 2.0\n", + "\n", + "Auto gradient dy/dx: 108.0\n" + ] + } + ], + "source": [ + "# Specify that x wants the gradient to be computed\n", + "x = torch.full((1,), 2,dtype=float, requires_grad=True)\n", + "\n", + "print('x:', x.item())\n", + "\n", + "y = ((x + 4) * 3) ** 2\n", + "\n", + "# Ask autograd to compute the gradients\n", + "y.backward()\n", + "\n", + "print('\\nAuto gradient dy/dx:', x.grad.item())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Coding an MLP with PyTorch\n", + "\n", + "Let us have a look at how we can implement and train a multi-layer perceptron in PyTorch in only a few steps. In this example, we will re-implement the multi-layer perceptron (MLP) seen in the previous lab for multi-class classification:\n", + "\n", + "$$x^{(0)} \\xrightarrow{W^{(1)},b^{(1)}} s^{(1)} \\xrightarrow{\\sigma} x^{(1)} \\xrightarrow{W^{(2)},b^{(2)}} s^{(2)}$$\n", + "$$x^{(0)} \\xrightarrow{Linear_1} s ^{(1)} \\xrightarrow{tanh} x^{(1)} \\xrightarrow{Linear_2} s^{(2)}$$\n", + "\n", + "As you can see, there is a little difference here: we do not have a sigmoid at the output anymore! This is because we will use a different loss that is better suited for classification tasks: cross entropy. Since we do not have to compute gradients manually, we are not limited to simple losses like MSE.\n", + "\n", + "PyTorch provides a collection of classes and functions, contained in `torch.nn` ([doc](https://pytorch.org/docs/stable/nn.html)) that allows us to write our code in an efficient and elegant way. We start by creating a MLP class that inherits from `torch.nn.Module` ([doc](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)). The class is composed of two parts:\n", + "\n", + "The standard way of building models in PyTorch is to define a class that inherits from `torch.nn.Module` ([doc](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)), which is the base class for constructing all neural networks. \n", + "\n", + "The main elements of our computational graph must be created in the `__init__()` function of our class. In our case, these include the two [Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) layers and the [hyperbolic tangent](https://pytorch.org/docs/stable/generated/torch.nn.Tanh.html) activation function. When instantiating the class `Linear` we need to specify the input and output dimesions. For the first layer the input dimension is the number of features in our data (2), while the output dimension is an arbitrary number of hidden neurons (50); for the second layer the input dimension is equal to the output of the previous layer, while the output dimension is the number of possible classes (3).\n", + "\n", + "The forward pass is then defined with the method `forward`, which defines the order of computations on the input data `x`.\n", + "\n", + "**Reminder about classes:**\n", + "* the `self` keyword represents the current object when used in a class\n", + "* the `__init__` function is called when an object is created\n", + "* the `super().__init__()` calls the `__init__` function of the parent class, which `nn.Module` in this case\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "class MLP(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " \n", + " # Create the two linear layers\n", + " self.linear1 = nn.Linear(2, 50)\n", + "\n", + " self.linear2 = nn.Linear(50, 3)\n", + " \n", + " # And the activation\n", + " self.activation1 = nn.Tanh()\n", + " \n", + " def forward(self, x):\n", + " \n", + " # L1 -> s1 -> L2\n", + " x = self.linear1(x)\n", + "\n", + " x = self.activation1(x)\n", + "\n", + " x = self.linear2(x)\n", + " \n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the model architecture is defined, we can instantiate the class and print the properties of the obtained object." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MLP(\n", + " (linear1): Linear(in_features=2, out_features=50, bias=True)\n", + " (linear2): Linear(in_features=50, out_features=3, bias=True)\n", + " (activation1): Tanh()\n", + ")\n" + ] + } + ], + "source": [ + "# Instantiate an object of class MLP\n", + "model = MLP()\n", + "\n", + "print(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will train this MLP on the same dataset as last week." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'data.npz'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata.npz\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m circle_X \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcircle_X\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 4\u001b[0m circle_y \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcircle_y\u001b[39m\u001b[38;5;124m'\u001b[39m]\n", + "File \u001b[0;32m/usr/local/Caskroom/mambaforge/base/envs/pyee311/lib/python3.11/site-packages/numpy/lib/npyio.py:405\u001b[0m, in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding, max_header_size)\u001b[0m\n\u001b[1;32m 403\u001b[0m own_fid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 405\u001b[0m fid \u001b[38;5;241m=\u001b[39m stack\u001b[38;5;241m.\u001b[39menter_context(\u001b[38;5;28mopen\u001b[39m(os_fspath(file), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 406\u001b[0m own_fid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 408\u001b[0m \u001b[38;5;66;03m# Code to distinguish from NumPy binary files and pickles.\u001b[39;00m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data.npz'" + ] + } + ], + "source": [ + "data = np.load('data.npz')\n", + "\n", + "circle_X = data['circle_X']\n", + "circle_y = data['circle_y']\n", + "\n", + "plt.figure(figsize=(7, 7))\n", + "plt.scatter(circle_X[:,0], circle_X[:,1], c=circle_y)\n", + "plt.axis('square')\n", + "plt.grid()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As always, we start by preprocessing the dataset. We split it in train and test.\n", + "\n", + "Note that we do not need one-hot encoding like last week, as we use cross entropy instead of MSE.\n", + "\n", + "To be rigorous, we can only use the train set to compute normalization parameters.\n", + "\n", + "For use with PyTorch, we then convert the data to float Tensors." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train set shape: torch.Size([800, 2])\n" + ] + } + ], + "source": [ + "# Split in train and test sets\n", + "train_input, test_input, train_target, test_target = train_test_split(circle_X, circle_y, test_size=0.2, random_state=0)\n", + "\n", + "# Normalize input \n", + "mu, std = train_input.mean(0), train_input.std(0)\n", + "train_input = (train_input - mu) / std\n", + "test_input = (test_input - mu) / std\n", + "\n", + "# Convert the sets to Tensors\n", + "train_input = torch.from_numpy(train_input).float()\n", + "test_input = torch.from_numpy(test_input).float()\n", + "\n", + "train_target = torch.from_numpy(train_target)\n", + "test_target = torch.from_numpy(test_target)\n", + "\n", + "print('Train set shape:', train_input.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training the model\n", + "\n", + "Before training the model, we must still instantiate an optimizer and a loss function.\n", + "\n", + "The optimizer will take care of implementing backpropagation, in this case we will use a simple Stochastic Gradient Descent (SGD).\n", + "\n", + "When creating the optimizer, we tell it that it must update all the parameters of our model, and we set the learning rate for gradient descent." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = optim.SGD(model.parameters(), lr=1e-1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the loss function, we use cross entropy as it is very efficient for classification problems. We do not need to specify any additional parameters here." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "criterion = nn.CrossEntropyLoss()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we define the size of the training batches. Each data batch contains a limited number of samples instead of the full dataset.\n", + "\n", + "Using batches for training allows to reduce the memory usage and speed up the computations.\n", + "\n", + "In this example, we define the batch to contain 50 samples.\n", + "\n", + "We also create an empty list that we will use to save and plot the training loss." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 50\n", + "\n", + "train_loss = []" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is finally time to implement our training loop!\n", + "\n", + "The outer loop iterates over epochs: each time we go over the full training dataset.\n", + "\n", + "The inner loop iterates over the batches in the dataset.\n", + "\n", + "**The core of the loops is the following process:**\n", + "\n", + "1. We reset the gradients of the optimizer as we don't want to accumulate them over time\n", + "\n", + "2. We compute predictions using the forward pass on the batch\n", + "\n", + "3. We compute the loss between the predictions and the targets\n", + "\n", + "4. We compute the gradient of the loss\n", + "\n", + "5. We update the parameters of our model\n", + "\n", + "At the end of each epoch, we save the training loss to generate the final graph. It shows if the network is learning correctly.\n", + "\n", + "Note that if you run the following cell multiple times without creating a new model (and optimizer), the training will continue from where it stopped." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7a5f653ace5d43b4a51d9837a8c46a79", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Epochs: 0%| | 0/1000 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "torch.manual_seed(42)\n", + "\n", + "for epoch in trange(1000, desc='Epochs'):\n", + " epoch_loss = 0\n", + "\n", + " for batch_idx in range(train_input.shape[0] // batch_size):\n", + "\n", + " # Create the batch\n", + " X_batch = train_input[batch_idx * batch_size : (batch_idx+1) * batch_size]\n", + " y_batch = train_target[batch_idx * batch_size : (batch_idx+1) * batch_size]\n", + " \n", + " # 1. Reset the gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # 2. Compute the predictions\n", + " predict = model(X_batch)\n", + "\n", + " # 3. Compute the loss\n", + " loss = criterion(predict, y_batch)\n", + " \n", + " # 4. Compute the loss gradient\n", + " loss.backward()\n", + " \n", + " # 5. Update the model parameters\n", + " optimizer.step()\n", + "\n", + " # Keep track of the total loss\n", + " epoch_loss += loss.item() * X_batch.shape[0]\n", + "\n", + " # Save the total epoch loss, averaged per sample\n", + " train_loss.append(epoch_loss / train_input.shape[0])\n", + "\n", + "\n", + "plt.figure(figsize=(10, 7))\n", + "plt.plot(train_loss)\n", + "plt.title('Training loss')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have trained the model, we want to measure its performance.\n", + "\n", + "Because of the architecture of the model and the loss function, our network outputs 3 values for each sample. We must keep only the class with the highest value for our prediction.\n", + "\n", + "Compute the accuracy of the model over the training and test set. Do they match last week's results?" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training accuracy: 88.50%\n", + "Test accuracy: 85.50%\n" + ] + } + ], + "source": [ + "# Code here\n", + "\n", + "predict = model(train_input)\n", + "predict = predict.argmax(1)\n", + "\n", + "accuracy = ((predict == train_target) * 1.0).mean()\n", + "\n", + "predict_t = model(test_input)\n", + "predict_t = predict_t.argmax(1)\n", + "\n", + "accuracy_t = ((predict_t == test_target) * 1.0).mean()\n", + "\n", + "print('Training accuracy: {:.02f}%'.format(accuracy * 100))\n", + "print('Test accuracy: {:.02f}%'.format(accuracy_t * 100))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convolutional Neural Network - Image Classification\n", + "\n", + "In this section, we are going to investigate a binary classification problem using a CNN.\n", + "\n", + "We have an annotated dataset of images with two classes, of which some examples are shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2020-04-30T08:05:29.567476Z", + "start_time": "2020-04-30T08:05:28.367329Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "data_X = data['data_X']\n", + "data_y = data['data_y']\n", + "\n", + "plt.figure(figsize=(12, 8))\n", + "\n", + "for i in range(2):\n", + " class_X = data_X[data_y == i]\n", + " for j in range(3):\n", + " plt.subplot(2, 3, 3 * i + j + 1)\n", + " plt.title('Class: {}'.format(i))\n", + " plt.imshow(class_X[j])\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model architecture\n", + "\n", + "To tackle this binary classification problem, we want to design a simple CNN with a single convolutional layer followed by a fully connected layer.\n", + "\n", + "More precisely, the architecture is the following:\n", + "\n", + "X $\\rightarrow$ [Conv2d](https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html) $\\rightarrow$ [ReLU](https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html) $\\rightarrow$ [MaxPool2d](https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html) $\\rightarrow$ [Flatten](https://pytorch.org/docs/stable/generated/torch.nn.Flatten.html) $\\rightarrow$ [Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) $\\rightarrow$ [Sigmoid](https://pytorch.org/docs/stable/generated/torch.nn.Sigmoid.html) $\\rightarrow$ y\n", + "\n", + "For the convolutional layer, we choose a kernel of size 3 with a single output channel.\n", + "\n", + "As a reminder, the pooling layer will reduce the dimension of the array by keeping only the maximum value by zones. We use a kernel of size 2 for the pooling.\n", + "\n", + "The flatten layer is needed because the output of the convolutional layer (and the pooling layer) is a 2D image, but fully connected (linear) layers only accept vectors as inputs.\n", + "\n", + "You need to compute the right input size for the linear layer. For this, you need to first check the dimension of the input images in the cell below.\n", + "\n", + "Use that information to compute the desired size for the linear output layer of your model. \n", + "\n", + "When performing a convolution with valid entries only (no padding of the input) on a $W \\times H$ image with a kernel of size $w \\times h$, the output will be of size $(W - w + 1) \\times (H - h + 1)$.\n", + "\n", + "The data then goes into a pooling layer, which reduces its dimension by N, with N being the size of the pooling kernel." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input shape: (6, 6)\n", + "Convolution output size: 4\n", + "Pooling output size: 2\n", + "Linear input size: 4\n" + ] + } + ], + "source": [ + "# Code here\n", + "\n", + "shape = data_X.shape[-2:]\n", + "\n", + "print('Input shape:', shape)\n", + "\n", + "out_conv = shape[0] - 3 + 1\n", + "\n", + "print('Convolution output size:', out_conv)\n", + "\n", + "out_pool = out_conv // 2\n", + "\n", + "print('Pooling output size:', out_pool)\n", + "\n", + "in_linear = out_pool ** 2\n", + "\n", + "print('Linear input size:', in_linear)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can now implement the model in the following cell. As you can see, the forward function uses `squeeze` before returning the result. This is to remove any unnecessary empty dimensions in the output.\n", + "\n", + "Hint: Use the following classes for your implementation\n", + "- `nn.Conv2d`\n", + "- `nn.ReLU`\n", + "- `nn.MaxPool2d`\n", + "- `nn.Flatten`\n", + "- `nn.Linear`\n", + "- `nn.Sigmoid`\n", + "\n", + "Take a look back at the [torch.nn documentation](https://pytorch.org/docs/stable/nn.html) to figure out which parameters to use." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + "\n", + " ########################################\n", + " # Code here\n", + "\n", + " self.conv = nn.Conv2d(1, 1, 3)\n", + "\n", + " self.relu = nn.ReLU()\n", + "\n", + " self.maxpool = nn.MaxPool2d(2)\n", + "\n", + " self.flatten = nn.Flatten()\n", + "\n", + " self.linear = nn.Linear(4, 1)\n", + "\n", + " self.sigmoid = nn.Sigmoid()\n", + "\n", + " ########################################\n", + "\n", + "\n", + " def forward(self, x):\n", + "\n", + " ########################################\n", + " # Code here\n", + " \n", + " x = self.conv(x)\n", + "\n", + " x = self.relu(x)\n", + "\n", + " x = self.maxpool(x)\n", + "\n", + " x = self.flatten(x)\n", + "\n", + " x = self.linear(x)\n", + "\n", + " x = self.sigmoid(x)\n", + " \n", + " ########################################\n", + " \n", + " return x.squeeze()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Processing the data\n", + "\n", + "Start with the preprocessing of the data.\n", + "\n", + "Normalize the datasets (remember, use only the training information), then convert them to float Tensors.\n", + "\n", + "The targets must also be converted to float Tensors for the cross entropy loss." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.2, random_state=42)\n", + "\n", + "\n", + "################################################\n", + "# Code here\n", + "\n", + "x_mean = X_train.mean()\n", + "x_std = X_train.std()\n", + "\n", + "X_train = (X_train - x_mean) / x_std\n", + "X_test = (X_test - x_mean) / x_std\n", + "\n", + "X_train = torch.from_numpy(X_train).float()\n", + "X_test = torch.from_numpy(X_test).float()\n", + "\n", + "y_train = torch.from_numpy(y_train).float()\n", + "y_test = torch.from_numpy(y_test).float()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setting up the training\n", + "\n", + "We must now create the different objects required to train the model.\n", + "\n", + "Since we are now dealing with a binary classification problem, we will use the binary cross entropy loss implemented with the `nn.BCELoss` class.\n", + "We will use the stochastic gradient descent optimizer as in the first part of the lab.\n", + "\n", + "Complete the following cell, with an imposed learning rate of 0.1 and a batch size of 20.\n", + "\n", + "The `losses` variable should be an empty list to save the training loss later.\n", + "\n", + "Since we do not reset the model and optimizer in the following cell, the training will continue further every time it is launched if the previous cell is not called again." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Code here\n", + "\n", + "model = Net()\n", + "\n", + "criterion = nn.BCELoss()\n", + "\n", + "optimizer = optim.SGD(model.parameters(), lr=1e-1)\n", + "\n", + "batch_size = 20\n", + "\n", + "training_loss = []" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training the model\n", + "\n", + "You can now fill in the training loop. Remember the steps we listed above!\n", + "\n", + "Carefully check the documentation of Conv2d for the expected shape of the input data.\n", + "\n", + "At the end of the training, you should see a plot showing the evolution of the loss." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "432d211e8e5d439589c25ef8c9c8e5a2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Epochs: 0%| | 0/10 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "torch.manual_seed(42)\n", + "\n", + "for epoch in trange(10, desc='Epochs'):\n", + " epoch_loss = 0\n", + " for batch_idx in trange(X_train.shape[0] // batch_size, desc='Batches', leave=None):\n", + " \n", + " ####################################################\n", + " # Code here\n", + "\n", + " X_batch = X_train[batch_idx * batch_size : (batch_idx+1) * batch_size]\n", + " y_batch = y_train[batch_idx * batch_size : (batch_idx+1) * batch_size]\n", + "\n", + " X_batch = X_batch[:, None, ...]\n", + "\n", + " optimizer.zero_grad()\n", + "\n", + " predict = model(X_batch)\n", + "\n", + " loss = criterion(predict, y_batch)\n", + "\n", + " loss.backward()\n", + " \n", + " optimizer.step()\n", + "\n", + " epoch_loss += loss.item() * X_batch.shape[0]\n", + "\n", + " training_loss.append(epoch_loss / X_train.shape[0])\n", + "\n", + "\n", + "plt.figure(figsize=(10, 7))\n", + "plt.plot(training_loss, label='Training')\n", + "plt.title('Losses')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Computing the accuracy\n", + "\n", + "How well does the model perform? Compute its accuracy over the training and test sets.\n", + "\n", + "You can run the training cell again to further improve the performance." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training accuracy: 73.75%\n", + "Test accuracy: 59.00%\n" + ] + } + ], + "source": [ + "# Code here\n", + "\n", + "predict = model(X_train[:, None, ...])\n", + "predict = predict > 0.5\n", + "\n", + "accuracy = ((predict == y_train) * 1.0).mean()\n", + "\n", + "predict_t = model(X_test[:, None, ...])\n", + "predict_t = predict_t > 0.5\n", + "\n", + "accuracy_t = ((predict_t == y_test) * 1.0).mean()\n", + "\n", + "print('Training accuracy: {:.02f}%'.format(accuracy * 100))\n", + "print('Test accuracy: {:.02f}%'.format(accuracy_t * 100))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitoring the test loss\n", + "\n", + "It is always important to keep track of the test loss as well, particularly when the performance on test and training sets is different.\n", + "\n", + "Copy the two cells under *Setting up the training* and *Training the model*, and modify them to also compute and plot the loss on the test set. This should be computed at the end of every epoch.\n", + "\n", + "You do not need to use batches for computing the test loss." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# Code here\n", + "\n", + "model = Net()\n", + "\n", + "criterion = nn.BCELoss()\n", + "\n", + "optimizer = optim.SGD(model.parameters(), lr=1e-1)\n", + "\n", + "batch_size = 20\n", + "\n", + "training_loss = []\n", + "test_loss = []" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "df380188a4644f6fb00c8397d4e99060", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Epochs: 0%| | 0/10 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "torch.manual_seed(42)\n", + "\n", + "for epoch in trange(10, desc='Epochs'):\n", + " epoch_loss = 0\n", + " for batch_idx in trange(X_train.shape[0] // batch_size, desc='Batches', leave=None):\n", + " \n", + " ####################################################\n", + " # Code here\n", + "\n", + " X_batch = X_train[batch_idx * batch_size : (batch_idx+1) * batch_size]\n", + " y_batch = y_train[batch_idx * batch_size : (batch_idx+1) * batch_size]\n", + "\n", + " X_batch = X_batch[:, None, ...]\n", + "\n", + " optimizer.zero_grad()\n", + "\n", + " predict = model(X_batch)\n", + "\n", + " loss = criterion(predict, y_batch)\n", + "\n", + " loss.backward()\n", + " \n", + " optimizer.step()\n", + "\n", + " epoch_loss += loss.item() * X_batch.shape[0]\n", + "\n", + " training_loss.append(epoch_loss / X_train.shape[0])\n", + "\n", + " # Compute the test loss\n", + " predict_t = model(X_test[:, None, ...])\n", + "\n", + " loss_t = criterion(predict_t, y_test)\n", + "\n", + " test_loss.append(loss_t.item())\n", + "\n", + "\n", + "plt.figure(figsize=(10, 7))\n", + "plt.plot(training_loss, label='Training')\n", + "plt.plot(test_loss, label='Test')\n", + "plt.title('Losses')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}