424 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			424 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 21,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "import os\n",
 | |
|     "\n",
 | |
|     "import numpy as np\n",
 | |
|     "import pandas as pd\n",
 | |
|     "import cupy as cp\n",
 | |
|     "\n",
 | |
|     "from sklearn.model_selection import train_test_split\n",
 | |
|     "from itertools import product"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "try:\n",
 | |
|     "    import cupy as cp\n",
 | |
|     "    if cp.cuda.is_available():\n",
 | |
|     "        print(\"GPU is available. Using CuPy for GPU acceleration.\")\n",
 | |
|     "        xp = cp\n",
 | |
|     "    else:\n",
 | |
|     "        print(\"GPU is not available. Falling back to NumPy on CPU.\")\n",
 | |
|     "        xp = np\n",
 | |
|     "except ImportError:\n",
 | |
|     "    print(\"CuPy not found. Using NumPy on CPU.\")\n",
 | |
|     "    xp = np"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 23,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "data = pd.read_csv('data/bel_data_test.csv')\n",
 | |
|     "data = xp.array(data)\n",
 | |
|     "\n",
 | |
|     "# Split data\n",
 | |
|     "X = data[:, 1:].T\n",
 | |
|     "Y = data[:, 0].astype(int)\n",
 | |
|     "\n",
 | |
|     "# Separate test set (first 1000 rows)\n",
 | |
|     "X_test = X[:, :1000]\n",
 | |
|     "Y_test = Y[:1000]\n",
 | |
|     "\n",
 | |
|     "# Remaining data for training and validation\n",
 | |
|     "X_remain = X[:, 1000:]\n",
 | |
|     "Y_remain = Y[1000:]\n",
 | |
|     "\n",
 | |
|     "# Split remaining data into training and validation sets\n",
 | |
|     "X_train, X_val, Y_train, Y_val = train_test_split(X_remain.T, Y_remain, test_size=0.2, random_state=42)\n",
 | |
|     "X_train, X_val = X_train.T, X_val.T"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "print(\"Data shapes:\")\n",
 | |
|     "print(f\"X_train shape: {X_train.shape}\")\n",
 | |
|     "print(f\"Y_train shape: {Y_train.shape}\")\n",
 | |
|     "print(f\"X_test shape: {X_test.shape}\")\n",
 | |
|     "print(f\"Y_test shape: {Y_test.shape}\")\n",
 | |
|     "\n",
 | |
|     "print(\"\\nData statistics:\")\n",
 | |
|     "print(f\"X_train mean: {xp.mean(X_train)}, std: {xp.std(X_train)}\")\n",
 | |
|     "print(f\"X_test mean: {xp.mean(X_test)}, std: {xp.std(X_test)}\")\n",
 | |
|     "print(f\"Unique Y_train values: {xp.unique(Y_train)}\")\n",
 | |
|     "print(f\"Unique Y_test values: {xp.unique(Y_test)}\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# Determine input and output layer sizes\n",
 | |
|     "input_size = X_train.shape[0]\n",
 | |
|     "output_size = len(np.unique(Y))-1\n",
 | |
|     "\n",
 | |
|     "print(f\"Input layer size: {input_size}\")\n",
 | |
|     "print(f\"Output layer size: {output_size}\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 26,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "X_train, Y_train = xp.array(X_train), xp.array(Y_train)\n",
 | |
|     "X_val, Y_val = xp.array(X_val), xp.array(Y_val)\n",
 | |
|     "X_test, Y_test = xp.array(X_test), xp.array(Y_test)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 27,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "def init_params(layer_dims):\n",
 | |
|     "    params = {}\n",
 | |
|     "    L = len(layer_dims)\n",
 | |
|     "    \n",
 | |
|     "    for l in range(1, L):\n",
 | |
|     "        params[f'W{l}'] = xp.random.randn(layer_dims[l], layer_dims[l-1]) * xp.sqrt(2. / layer_dims[l-1])\n",
 | |
|     "        params[f'b{l}'] = xp.zeros((layer_dims[l], 1))\n",
 | |
|     "    \n",
 | |
|     "    return params"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 28,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "def ReLU(Z):\n",
 | |
|     "    return xp.maximum(Z, 0)\n",
 | |
|     "\n",
 | |
|     "def softmax(Z):\n",
 | |
|     "    exp_Z = xp.exp(Z - xp.max(Z, axis=0, keepdims=True))\n",
 | |
|     "    return exp_Z / xp.sum(exp_Z, axis=0, keepdims=True)\n",
 | |
|     "\n",
 | |
|     "def forward_prop(X, params):\n",
 | |
|     "    caches = []\n",
 | |
|     "    A = X\n",
 | |
|     "    L = len(params) // 2\n",
 | |
|     "\n",
 | |
|     "    for l in range(1, L):\n",
 | |
|     "        A_prev = A\n",
 | |
|     "        W = params[f'W{l}']\n",
 | |
|     "        b = params[f'b{l}']\n",
 | |
|     "        Z = xp.dot(W, A_prev) + b\n",
 | |
|     "        A = ReLU(Z)\n",
 | |
|     "        caches.append((A_prev, W, b, Z))\n",
 | |
|     "\n",
 | |
|     "    WL = params[f'W{L}']\n",
 | |
|     "    bL = params[f'b{L}']\n",
 | |
|     "    ZL = xp.dot(WL, A) + bL\n",
 | |
|     "    AL = softmax(ZL)\n",
 | |
|     "    caches.append((A, WL, bL, ZL))\n",
 | |
|     "\n",
 | |
|     "    return AL, caches\n",
 | |
|     "\n",
 | |
|     "def ReLU_deriv(Z):\n",
 | |
|     "    return Z > 0\n",
 | |
|     "\n",
 | |
|     "def one_hot(Y):\n",
 | |
|     "    # one_hot_Y = xp.zeros((Y.size, Y.max() + 1))\n",
 | |
|     "    # one_hot_Y[xp.arange(Y.size), Y] = 1\n",
 | |
|     "    # one_hot_Y = one_hot_Y.T\n",
 | |
|     "    # return one_hot_Y\n",
 | |
|     "    Y = Y.astype(int)\n",
 | |
|     "    one_hot_Y = xp.zeros((Y.size, int(xp.max(Y)) + 1))\n",
 | |
|     "    one_hot_Y[xp.arange(Y.size), Y] = 1\n",
 | |
|     "    return one_hot_Y.T\n",
 | |
|     "\n",
 | |
|     "def backward_prop(AL, Y, caches):\n",
 | |
|     "    grads = {}\n",
 | |
|     "    L = len(caches)\n",
 | |
|     "    m = AL.shape[1]\n",
 | |
|     "    Y = one_hot(Y)\n",
 | |
|     "\n",
 | |
|     "    dAL = AL - Y\n",
 | |
|     "    current_cache = caches[L-1]\n",
 | |
|     "    grads[f\"dW{L}\"] = 1 / m * xp.dot(dAL, current_cache[0].T)\n",
 | |
|     "    grads[f\"db{L}\"] = 1 / m * xp.sum(dAL, axis=1, keepdims=True)\n",
 | |
|     "    dA_prev = xp.dot(current_cache[1].T, dAL)\n",
 | |
|     "\n",
 | |
|     "    for l in reversed(range(L-1)):\n",
 | |
|     "        current_cache = caches[l]\n",
 | |
|     "        dZ = dA_prev * ReLU_deriv(current_cache[3])\n",
 | |
|     "        grads[f\"dW{l+1}\"] = 1 / m * xp.dot(dZ, current_cache[0].T)\n",
 | |
|     "        grads[f\"db{l+1}\"] = 1 / m * xp.sum(dZ, axis=1, keepdims=True)\n",
 | |
|     "        if l > 0:\n",
 | |
|     "            dA_prev = xp.dot(current_cache[1].T, dZ)\n",
 | |
|     "\n",
 | |
|     "    return grads\n",
 | |
|     "\n",
 | |
|     "def update_params(params, grads, alpha):\n",
 | |
|     "    L = len(params) // 2\n",
 | |
|     "\n",
 | |
|     "    for l in range(1, L + 1):\n",
 | |
|     "        params[f\"W{l}\"] -= alpha * grads[f\"dW{l}\"]\n",
 | |
|     "        params[f\"b{l}\"] -= alpha * grads[f\"db{l}\"]\n",
 | |
|     "\n",
 | |
|     "    return params\n",
 | |
|     "\n",
 | |
|     "def get_predictions(AL):\n",
 | |
|     "    return xp.argmax(AL, axis=0)\n",
 | |
|     "\n",
 | |
|     "def get_accuracy(predictions, Y):\n",
 | |
|     "    return xp.sum(predictions == Y) / Y.size"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 29,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "def gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold=0.85):\n",
 | |
|     "    params = init_params(layer_dims)\n",
 | |
|     "    best_val_accuracy = 0\n",
 | |
|     "    acc_store = []\n",
 | |
|     "    \n",
 | |
|     "    for i in range(iterations):\n",
 | |
|     "        AL, caches = forward_prop(X_train, params)\n",
 | |
|     "        grads = backward_prop(AL, Y_train, caches)\n",
 | |
|     "        params = update_params(params, grads, alpha)\n",
 | |
|     "\n",
 | |
|     "        if i % 100 == 0:\n",
 | |
|     "            train_predictions = get_predictions(AL)\n",
 | |
|     "            train_accuracy = get_accuracy(train_predictions, Y_train)\n",
 | |
|     "            \n",
 | |
|     "            val_AL, _ = forward_prop(X_val, params)\n",
 | |
|     "            val_predictions = get_predictions(val_AL)\n",
 | |
|     "            val_accuracy = get_accuracy(val_predictions, Y_val)\n",
 | |
|     "            \n",
 | |
|     "            print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n",
 | |
|     "            print(f\"Sample predictions: {train_predictions[:10]}\")\n",
 | |
|     "            print(f\"Sample true labels: {Y_train[:10]}\")\n",
 | |
|     "            \n",
 | |
|     "            print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n",
 | |
|     "            acc_store.append((train_accuracy, val_accuracy))\n",
 | |
|     "            \n",
 | |
|     "            if val_accuracy > best_val_accuracy:\n",
 | |
|     "                best_val_accuracy = val_accuracy\n",
 | |
|     "                best_params = params.copy()\n",
 | |
|     "            \n",
 | |
|     "            # Early stopping condition based on validation accuracy threshold\n",
 | |
|     "            if val_accuracy >= accuracy_threshold:\n",
 | |
|     "                print(f\"Validation accuracy threshold of {accuracy_threshold:.2f} reached. Stopping training.\")\n",
 | |
|     "                break\n",
 | |
|     "\n",
 | |
|     "    return best_params, best_val_accuracy, acc_store"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 30,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "def grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha, iterations, accuracy_threshold=0.85):\n",
 | |
|     "    results = []\n",
 | |
|     "    \n",
 | |
|     "    for layer_config in layer_configs:\n",
 | |
|     "        layer_dims = [input_size] + list(layer_config) + [output_size]\n",
 | |
|     "        print(f\"Training architecture: {layer_dims}\")\n",
 | |
|     "        best_params, accuracy, acc_store = gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold)\n",
 | |
|     "        results.append((layer_config, accuracy, best_params, acc_store))\n",
 | |
|     "        print(f\"Architecture {layer_dims}: Best Validation Accuracy: {accuracy:.4f}\\n\")\n",
 | |
|     "    \n",
 | |
|     "    return sorted(results, key=lambda x: x[1], reverse=True)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 31,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "def evaluate_model(X, Y, params):\n",
 | |
|     "    correct_predictions = 0\n",
 | |
|     "    total_samples = X.shape[1]\n",
 | |
|     "    predictions = []\n",
 | |
|     "    actual_labels = []\n",
 | |
|     "    \n",
 | |
|     "    for i in range(total_samples):\n",
 | |
|     "        x = X[:, i:i+1]  # Get a single sample\n",
 | |
|     "        y = Y[i]\n",
 | |
|     "        \n",
 | |
|     "        AL, _ = forward_prop(x, params)\n",
 | |
|     "        prediction = int(get_predictions(AL)[0])\n",
 | |
|     "        \n",
 | |
|     "        predictions.append(prediction)\n",
 | |
|     "        actual_labels.append(int(y))\n",
 | |
|     "        \n",
 | |
|     "        if prediction == y:\n",
 | |
|     "            correct_predictions += 1\n",
 | |
|     "        \n",
 | |
|     "    accuracy = correct_predictions / total_samples\n",
 | |
|     "    \n",
 | |
|     "    return {\n",
 | |
|     "        'accuracy': accuracy,\n",
 | |
|     "        'predictions': predictions,\n",
 | |
|     "        'actual_labels': actual_labels,\n",
 | |
|     "        'correct_predictions': correct_predictions,\n",
 | |
|     "        'total_samples': total_samples\n",
 | |
|     "    }"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 32,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "hidden_layers = [1, 2]\n",
 | |
|     "neurons_per_layer = [64, 128, 256, 512]\n",
 | |
|     "layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "print(layer_configs)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# Perform grid search\n",
 | |
|     "print(\"Performing grid search...\")\n",
 | |
|     "best_configs = grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha=0.01, iterations=4000)\n",
 | |
|     "\n",
 | |
|     "print(\"\\nTop 5 Architectures:\")\n",
 | |
|     "for config, accuracy, _, _ in best_configs[:5]:\n",
 | |
|     "    print(f\"Hidden Layers: {config}, Validation Accuracy: {accuracy:.4f}\")\n",
 | |
|     "\n",
 | |
|     "# Select the best configuration\n",
 | |
|     "best_config, best_accuracy, best_params, best_acc_store = best_configs[0]\n",
 | |
|     "best_layer_dims = [input_size] + list(best_config) + [output_size]\n",
 | |
|     "\n",
 | |
|     "print(f\"\\nBest architecture: {best_layer_dims}\")\n",
 | |
|     "print(f\"Best validation accuracy: {best_accuracy:.4f}\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "print(\"\\nModel Architecture:\")\n",
 | |
|     "for i in range(1, len(best_params)//2 + 1):\n",
 | |
|     "    print(f\"Layer {i}: {best_params[f'W{i}'].shape}\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# Save the accuracy data for the best model\n",
 | |
|     "df = pd.DataFrame(best_acc_store, columns=['Train Accuracy', 'Validation Accuracy'])\n",
 | |
|     "df.to_csv('results/bel_acc.csv', index=False)\n",
 | |
|     "\n",
 | |
|     "# Save the weights of the best model\n",
 | |
|     "np.savez(\"weights/bel_weights.npz\", **best_params)\n",
 | |
|     "\n",
 | |
|     "# Evaluate on test set\n",
 | |
|     "test_AL, _ = forward_prop(X_test, {k: xp.array(v) for k, v in best_params.items()})\n",
 | |
|     "test_predictions = get_predictions(test_AL)\n",
 | |
|     "test_accuracy = float(get_accuracy(test_predictions, Y_test))\n",
 | |
|     "print(f\"Test Accuracy: {test_accuracy:.4f}\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# Use the function\n",
 | |
|     "print(\"\\nEvaluating on 500 test samples:\")\n",
 | |
|     "test_results = evaluate_model(X_test, Y_test, best_params)\n",
 | |
|     "\n",
 | |
|     "print(f\"Test Accuracy (500 samples): {test_results['accuracy']:.4f}\")\n",
 | |
|     "print(f\"Correct predictions: {test_results['correct_predictions']} out of {test_results['total_samples']}\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "semantics",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.12.1"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 2
 | |
| }
 |