diff --git a/bel_NN_dynamic.ipynb b/bel_NN_dynamic.ipynb index 6a2066a..ba9358c 100644 --- a/bel_NN_dynamic.ipynb +++ b/bel_NN_dynamic.ipynb @@ -2,24 +2,47 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "import os\n", + "\n", "import numpy as np\n", "import pandas as pd\n", + "import cupy as cp\n", + "\n", "from sklearn.model_selection import train_test_split\n", "from itertools import product" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import cupy as cp\n", + " if cp.cuda.is_available():\n", + " print(\"GPU is available. Using CuPy for GPU acceleration.\")\n", + " xp = cp\n", + " else:\n", + " print(\"GPU is not available. Falling back to NumPy on CPU.\")\n", + " xp = np\n", + "except ImportError:\n", + " print(\"CuPy not found. Using NumPy on CPU.\")\n", + " xp = np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('data/bel_data_test.csv')\n", - "data = np.array(data)\n", + "data = xp.array(data)\n", "\n", "# Split data\n", "X = data[:, 1:].T\n", @@ -54,7 +77,18 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, Y_train = xp.array(X_train), xp.array(Y_train)\n", + "X_val, Y_val = xp.array(X_val), xp.array(Y_val)\n", + "X_test, Y_test = xp.array(X_test), xp.array(Y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -63,23 +97,23 @@ " L = len(layer_dims)\n", " \n", " for l in range(1, L):\n", - " params[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2. / layer_dims[l-1])\n", - " params[f'b{l}'] = np.zeros((layer_dims[l], 1))\n", + " params[f'W{l}'] = xp.random.randn(layer_dims[l], layer_dims[l-1]) * xp.sqrt(2. / layer_dims[l-1])\n", + " params[f'b{l}'] = xp.zeros((layer_dims[l], 1))\n", " \n", " return params" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def ReLU(Z):\n", - " return np.maximum(Z, 0)\n", + " return xp.maximum(Z, 0)\n", "\n", "def softmax(Z):\n", - " A = np.exp(Z) / sum(np.exp(Z))\n", + " A = xp.exp(Z) / sum(xp.exp(Z))\n", " return A\n", "\n", "def forward_prop(X, params):\n", @@ -91,13 +125,13 @@ " A_prev = A\n", " W = params[f'W{l}']\n", " b = params[f'b{l}']\n", - " Z = np.dot(W, A_prev) + b\n", + " Z = xp.dot(W, A_prev) + b\n", " A = ReLU(Z)\n", " caches.append((A_prev, W, b, Z))\n", "\n", " WL = params[f'W{L}']\n", " bL = params[f'b{L}']\n", - " ZL = np.dot(WL, A) + bL\n", + " ZL = xp.dot(WL, A) + bL\n", " AL = softmax(ZL)\n", " caches.append((A, WL, bL, ZL))\n", "\n", @@ -107,10 +141,14 @@ " return Z > 0\n", "\n", "def one_hot(Y):\n", - " one_hot_Y = np.zeros((Y.size, Y.max() + 1))\n", - " one_hot_Y[np.arange(Y.size), Y] = 1\n", - " one_hot_Y = one_hot_Y.T\n", - " return one_hot_Y\n", + " # one_hot_Y = xp.zeros((Y.size, Y.max() + 1))\n", + " # one_hot_Y[xp.arange(Y.size), Y] = 1\n", + " # one_hot_Y = one_hot_Y.T\n", + " # return one_hot_Y\n", + " Y = Y.astype(int)\n", + " one_hot_Y = xp.zeros((Y.size, int(xp.max(Y)) + 1))\n", + " one_hot_Y[xp.arange(Y.size), Y] = 1\n", + " return one_hot_Y.T\n", "\n", "def backward_prop(AL, Y, caches):\n", " grads = {}\n", @@ -120,17 +158,17 @@ "\n", " dAL = AL - Y\n", " current_cache = caches[L-1]\n", - " grads[f\"dW{L}\"] = 1 / m * np.dot(dAL, current_cache[0].T)\n", - " grads[f\"db{L}\"] = 1 / m * np.sum(dAL, axis=1, keepdims=True)\n", - " dA_prev = np.dot(current_cache[1].T, dAL)\n", + " grads[f\"dW{L}\"] = 1 / m * xp.dot(dAL, current_cache[0].T)\n", + " grads[f\"db{L}\"] = 1 / m * xp.sum(dAL, axis=1, keepdims=True)\n", + " dA_prev = xp.dot(current_cache[1].T, dAL)\n", "\n", " for l in reversed(range(L-1)):\n", " current_cache = caches[l]\n", " dZ = dA_prev * ReLU_deriv(current_cache[3])\n", - " grads[f\"dW{l+1}\"] = 1 / m * np.dot(dZ, current_cache[0].T)\n", - " grads[f\"db{l+1}\"] = 1 / m * np.sum(dZ, axis=1, keepdims=True)\n", + " grads[f\"dW{l+1}\"] = 1 / m * xp.dot(dZ, current_cache[0].T)\n", + " grads[f\"db{l+1}\"] = 1 / m * xp.sum(dZ, axis=1, keepdims=True)\n", " if l > 0:\n", - " dA_prev = np.dot(current_cache[1].T, dZ)\n", + " dA_prev = xp.dot(current_cache[1].T, dZ)\n", "\n", " return grads\n", "\n", @@ -144,15 +182,15 @@ " return params\n", "\n", "def get_predictions(AL):\n", - " return np.argmax(AL, axis=0)\n", + " return xp.argmax(AL, axis=0)\n", "\n", "def get_accuracy(predictions, Y):\n", - " return np.sum(predictions == Y) / Y.size" + " return xp.sum(predictions == Y) / Y.size" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -191,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -210,23 +248,47 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "# def predict(X, parameters):\n", - "# AL, _ = forward_propagation(X, parameters)\n", - "# predictions = (AL > 0.5) # Classify as 1 if greater than 0.5\n", - "# return predictions" + "def evaluate_model(X, Y, params):\n", + " correct_predictions = 0\n", + " total_samples = X.shape[1]\n", + " predictions = []\n", + " actual_labels = []\n", + " \n", + " for i in range(total_samples):\n", + " x = X[:, i:i+1] # Get a single sample\n", + " y = Y[i]\n", + " \n", + " AL, _ = forward_prop(x, params)\n", + " prediction = int(get_predictions(AL)[0])\n", + " \n", + " predictions.append(prediction)\n", + " actual_labels.append(int(y))\n", + " \n", + " if prediction == y:\n", + " correct_predictions += 1\n", + " \n", + " accuracy = correct_predictions / total_samples\n", + " \n", + " return {\n", + " 'accuracy': accuracy,\n", + " 'predictions': predictions,\n", + " 'actual_labels': actual_labels,\n", + " 'correct_predictions': correct_predictions,\n", + " 'total_samples': total_samples\n", + " }" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "hidden_layers = [1, 2]\n", + "hidden_layers = [1, 2, 3, 4]\n", "neurons_per_layer = [64, 128, 256]\n", "layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))" ] @@ -264,14 +326,35 @@ "df.to_csv('results/bel_acc.csv', index=False)\n", "\n", "# Save the weights of the best model\n", - "np.savez(\"weights/bel_weights\", **best_params)\n", + "np.savez(\"weights/bel_weights.npz\", **best_params)\n", "\n", "# Evaluate on test set\n", - "test_AL, _ = forward_prop(X_test, best_params)\n", + "test_AL, _ = forward_prop(X_test, {k: xp.array(v) for k, v in best_params.items()})\n", "test_predictions = get_predictions(test_AL)\n", - "test_accuracy = get_accuracy(test_predictions, Y_test)\n", + "test_accuracy = float(get_accuracy(test_predictions, Y_test))\n", "print(f\"Test Accuracy: {test_accuracy:.4f}\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use the function\n", + "print(\"\\nEvaluating on 500 test samples:\")\n", + "test_results = evaluate_model(X_test, Y_test, best_params)\n", + "\n", + "print(f\"Test Accuracy (500 samples): {test_results['accuracy']:.4f}\")\n", + "print(f\"Correct predictions: {test_results['correct_predictions']} out of {test_results['total_samples']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {