diff --git a/bel_NN_dynamic.ipynb b/bel_NN_dynamic.ipynb index e69de29..03041c0 100644 --- a/bel_NN_dynamic.ipynb +++ b/bel_NN_dynamic.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from itertools import product" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv('data/bel_data_test.csv')\n", + "data = np.array(data)\n", + "\n", + "# Split data\n", + "X = data[:, 1:].T\n", + "Y = data[:, 0].astype(int)\n", + "\n", + "# Separate test set (first 1000 rows)\n", + "X_test = X[:, :1000]\n", + "Y_test = Y[:1000]\n", + "\n", + "# Remaining data for training and validation\n", + "X_remain = X[:, 1000:]\n", + "Y_remain = Y[1000:]\n", + "\n", + "# Split remaining data into training and validation sets\n", + "X_train, X_val, Y_train, Y_val = train_test_split(X_remain.T, Y_remain, test_size=0.2, random_state=42)\n", + "X_train, X_val = X_train.T, X_val.T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Determine input and output layer sizes\n", + "input_size = X_train.shape[0]\n", + "output_size = len(np.unique(Y))-1\n", + "\n", + "print(f\"Input layer size: {input_size}\")\n", + "print(f\"Output layer size: {output_size}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def init_params(layer_dims):\n", + " params = {}\n", + " L = len(layer_dims)\n", + " \n", + " for l in range(1, L):\n", + " params[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2. / layer_dims[l-1])\n", + " params[f'b{l}'] = np.zeros((layer_dims[l], 1))\n", + " \n", + " return params" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def ReLU(Z):\n", + " return np.maximum(Z, 0)\n", + "\n", + "def softmax(Z):\n", + " A = np.exp(Z) / sum(np.exp(Z))\n", + " return A\n", + "\n", + "def forward_prop(X, params):\n", + " caches = []\n", + " A = X\n", + " L = len(params) // 2\n", + "\n", + " for l in range(1, L):\n", + " A_prev = A\n", + " W = params[f'W{l}']\n", + " b = params[f'b{l}']\n", + " Z = np.dot(W, A_prev) + b\n", + " A = ReLU(Z)\n", + " caches.append((A_prev, W, b, Z))\n", + "\n", + " WL = params[f'W{L}']\n", + " bL = params[f'b{L}']\n", + " ZL = np.dot(WL, A) + bL\n", + " AL = softmax(ZL)\n", + " caches.append((A, WL, bL, ZL))\n", + "\n", + " return AL, caches\n", + "\n", + "def ReLU_deriv(Z):\n", + " return Z > 0\n", + "\n", + "def one_hot(Y):\n", + " one_hot_Y = np.zeros((Y.size, Y.max() + 1))\n", + " one_hot_Y[np.arange(Y.size), Y] = 1\n", + " one_hot_Y = one_hot_Y.T\n", + " return one_hot_Y\n", + "\n", + "def backward_prop(AL, Y, caches):\n", + " grads = {}\n", + " L = len(caches)\n", + " m = AL.shape[1]\n", + " Y = one_hot(Y)\n", + "\n", + " dAL = AL - Y\n", + " current_cache = caches[L-1]\n", + " grads[f\"dW{L}\"] = 1 / m * np.dot(dAL, current_cache[0].T)\n", + " grads[f\"db{L}\"] = 1 / m * np.sum(dAL, axis=1, keepdims=True)\n", + " dA_prev = np.dot(current_cache[1].T, dAL)\n", + "\n", + " for l in reversed(range(L-1)):\n", + " current_cache = caches[l]\n", + " dZ = dA_prev * ReLU_deriv(current_cache[3])\n", + " grads[f\"dW{l+1}\"] = 1 / m * np.dot(dZ, current_cache[0].T)\n", + " grads[f\"db{l+1}\"] = 1 / m * np.sum(dZ, axis=1, keepdims=True)\n", + " if l > 0:\n", + " dA_prev = np.dot(current_cache[1].T, dZ)\n", + "\n", + " return grads\n", + "\n", + "def update_params(params, grads, alpha):\n", + " L = len(params) // 2\n", + "\n", + " for l in range(1, L + 1):\n", + " params[f\"W{l}\"] -= alpha * grads[f\"dW{l}\"]\n", + " params[f\"b{l}\"] -= alpha * grads[f\"db{l}\"]\n", + "\n", + " return params\n", + "\n", + "def get_predictions(AL):\n", + " return np.argmax(AL, axis=0)\n", + "\n", + "def get_accuracy(predictions, Y):\n", + " return np.sum(predictions == Y) / Y.size" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, early_stop_patience=10):\n", + " params = init_params(layer_dims)\n", + " best_val_accuracy = 0\n", + " patience_counter = 0\n", + " acc_store = []\n", + " \n", + " for i in range(iterations):\n", + " AL, caches = forward_prop(X_train, params)\n", + " grads = backward_prop(AL, Y_train, caches)\n", + " params = update_params(params, grads, alpha)\n", + "\n", + " if i % 100 == 0:\n", + " train_predictions = get_predictions(AL)\n", + " train_accuracy = get_accuracy(train_predictions, Y_train)\n", + " \n", + " val_AL, _ = forward_prop(X_val, params)\n", + " val_predictions = get_predictions(val_AL)\n", + " val_accuracy = get_accuracy(val_predictions, Y_val)\n", + " \n", + " print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n", + " acc_store.append((train_accuracy, val_accuracy))\n", + " \n", + " if val_accuracy > best_val_accuracy:\n", + " best_val_accuracy = val_accuracy\n", + " best_params = params.copy()\n", + " patience_counter = 0\n", + " else:\n", + " patience_counter += 1\n", + " \n", + " if patience_counter >= early_stop_patience:\n", + " print(\"Early stopping triggered.\")\n", + " break\n", + "\n", + " return best_params, best_val_accuracy, acc_store" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha, iterations):\n", + " results = []\n", + " \n", + " for layer_config in layer_configs:\n", + " layer_dims = [input_size] + list(layer_config) + [output_size]\n", + " print(f\"Training architecture: {layer_dims}\")\n", + " best_params, accuracy, acc_store = gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations)\n", + " results.append((layer_config, accuracy, best_params, acc_store))\n", + " print(f\"Architecture {layer_dims}: Best Validation Accuracy: {accuracy:.4f}\\n\")\n", + " \n", + " return sorted(results, key=lambda x: x[1], reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# def predict(X, parameters):\n", + "# AL, _ = forward_propagation(X, parameters)\n", + "# predictions = (AL > 0.5) # Classify as 1 if greater than 0.5\n", + "# return predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "hidden_layers = [1, 2]\n", + "neurons_per_layer = [64, 128, 256]\n", + "layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Perform grid search\n", + "print(\"Performing grid search...\")\n", + "best_configs = grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha=0.01, iterations=4000)\n", + "\n", + "print(\"\\nTop 5 Architectures:\")\n", + "for config, accuracy, _, _ in best_configs[:5]:\n", + " print(f\"Hidden Layers: {config}, Validation Accuracy: {accuracy:.4f}\")\n", + "\n", + "# Select the best configuration\n", + "best_config, best_accuracy, best_params, best_acc_store = best_configs[0]\n", + "best_layer_dims = [input_size] + list(best_config) + [output_size]\n", + "\n", + "print(f\"\\nBest architecture: {best_layer_dims}\")\n", + "print(f\"Best validation accuracy: {best_accuracy:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the accuracy data for the best model\n", + "df = pd.DataFrame(best_acc_store, columns=['Train Accuracy', 'Validation Accuracy'])\n", + "df.to_csv('results/bel_acc.csv', index=False)\n", + "\n", + "# Save the weights of the best model\n", + "np.savez(\"weights/bel_weights\", **best_params)\n", + "\n", + "# Evaluate on test set\n", + "test_AL, _ = forward_prop(X_test, best_params)\n", + "test_predictions = get_predictions(test_AL)\n", + "test_accuracy = get_accuracy(test_predictions, Y_test)\n", + "print(f\"Test Accuracy: {test_accuracy:.4f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "semantics", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}