380 lines
20 KiB
Text
380 lines
20 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import cupy as cp\n",
|
|
"\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from itertools import product"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"GPU is available. Using CuPy for GPU acceleration.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"try:\n",
|
|
" import cupy as cp\n",
|
|
" if cp.cuda.is_available():\n",
|
|
" print(\"GPU is available. Using CuPy for GPU acceleration.\")\n",
|
|
" xp = cp\n",
|
|
" else:\n",
|
|
" print(\"GPU is not available. Falling back to NumPy on CPU.\")\n",
|
|
" xp = np\n",
|
|
"except ImportError:\n",
|
|
" print(\"CuPy not found. Using NumPy on CPU.\")\n",
|
|
" xp = np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data = pd.read_csv('data/bel_data_test.csv')\n",
|
|
"data = xp.array(data)\n",
|
|
"\n",
|
|
"# Split data\n",
|
|
"X = data[:, 1:].T\n",
|
|
"Y = data[:, 0].astype(int)\n",
|
|
"\n",
|
|
"# Separate test set (first 1000 rows)\n",
|
|
"X_test = X[:, :1000]\n",
|
|
"Y_test = Y[:1000]\n",
|
|
"\n",
|
|
"# Remaining data for training and validation\n",
|
|
"X_remain = X[:, 1000:]\n",
|
|
"Y_remain = Y[1000:]\n",
|
|
"\n",
|
|
"# Split remaining data into training and validation sets\n",
|
|
"X_train, X_val, Y_train, Y_val = train_test_split(X_remain.T, Y_remain, test_size=0.2, random_state=42)\n",
|
|
"X_train, X_val = X_train.T, X_val.T"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Input layer size: 1024\n",
|
|
"Output layer size: 61\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Determine input and output layer sizes\n",
|
|
"input_size = X_train.shape[0]\n",
|
|
"output_size = len(np.unique(Y))-1\n",
|
|
"\n",
|
|
"print(f\"Input layer size: {input_size}\")\n",
|
|
"print(f\"Output layer size: {output_size}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_train, Y_train = xp.array(X_train), xp.array(Y_train)\n",
|
|
"X_val, Y_val = xp.array(X_val), xp.array(Y_val)\n",
|
|
"X_test, Y_test = xp.array(X_test), xp.array(Y_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def init_params(layer_dims):\n",
|
|
" params = {}\n",
|
|
" L = len(layer_dims)\n",
|
|
" \n",
|
|
" for l in range(1, L):\n",
|
|
" params[f'W{l}'] = xp.random.randn(layer_dims[l], layer_dims[l-1]) * xp.sqrt(2. / layer_dims[l-1])\n",
|
|
" params[f'b{l}'] = xp.zeros((layer_dims[l], 1))\n",
|
|
" \n",
|
|
" return params"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def ReLU(Z):\n",
|
|
" return xp.maximum(Z, 0)\n",
|
|
"\n",
|
|
"def softmax(Z):\n",
|
|
" A = xp.exp(Z) / sum(xp.exp(Z))\n",
|
|
" return A\n",
|
|
"\n",
|
|
"def forward_prop(X, params):\n",
|
|
" caches = []\n",
|
|
" A = X\n",
|
|
" L = len(params) // 2\n",
|
|
"\n",
|
|
" for l in range(1, L):\n",
|
|
" A_prev = A\n",
|
|
" W = params[f'W{l}']\n",
|
|
" b = params[f'b{l}']\n",
|
|
" Z = xp.dot(W, A_prev) + b\n",
|
|
" A = ReLU(Z)\n",
|
|
" caches.append((A_prev, W, b, Z))\n",
|
|
"\n",
|
|
" WL = params[f'W{L}']\n",
|
|
" bL = params[f'b{L}']\n",
|
|
" ZL = xp.dot(WL, A) + bL\n",
|
|
" AL = softmax(ZL)\n",
|
|
" caches.append((A, WL, bL, ZL))\n",
|
|
"\n",
|
|
" return AL, caches\n",
|
|
"\n",
|
|
"def ReLU_deriv(Z):\n",
|
|
" return Z > 0\n",
|
|
"\n",
|
|
"def one_hot(Y):\n",
|
|
" # one_hot_Y = xp.zeros((Y.size, Y.max() + 1))\n",
|
|
" # one_hot_Y[xp.arange(Y.size), Y] = 1\n",
|
|
" # one_hot_Y = one_hot_Y.T\n",
|
|
" # return one_hot_Y\n",
|
|
" Y = Y.astype(int)\n",
|
|
" one_hot_Y = xp.zeros((Y.size, int(xp.max(Y)) + 1))\n",
|
|
" one_hot_Y[xp.arange(Y.size), Y] = 1\n",
|
|
" return one_hot_Y.T\n",
|
|
"\n",
|
|
"def backward_prop(AL, Y, caches):\n",
|
|
" grads = {}\n",
|
|
" L = len(caches)\n",
|
|
" m = AL.shape[1]\n",
|
|
" Y = one_hot(Y)\n",
|
|
"\n",
|
|
" dAL = AL - Y\n",
|
|
" current_cache = caches[L-1]\n",
|
|
" grads[f\"dW{L}\"] = 1 / m * xp.dot(dAL, current_cache[0].T)\n",
|
|
" grads[f\"db{L}\"] = 1 / m * xp.sum(dAL, axis=1, keepdims=True)\n",
|
|
" dA_prev = xp.dot(current_cache[1].T, dAL)\n",
|
|
"\n",
|
|
" for l in reversed(range(L-1)):\n",
|
|
" current_cache = caches[l]\n",
|
|
" dZ = dA_prev * ReLU_deriv(current_cache[3])\n",
|
|
" grads[f\"dW{l+1}\"] = 1 / m * xp.dot(dZ, current_cache[0].T)\n",
|
|
" grads[f\"db{l+1}\"] = 1 / m * xp.sum(dZ, axis=1, keepdims=True)\n",
|
|
" if l > 0:\n",
|
|
" dA_prev = xp.dot(current_cache[1].T, dZ)\n",
|
|
"\n",
|
|
" return grads\n",
|
|
"\n",
|
|
"def update_params(params, grads, alpha):\n",
|
|
" L = len(params) // 2\n",
|
|
"\n",
|
|
" for l in range(1, L + 1):\n",
|
|
" params[f\"W{l}\"] -= alpha * grads[f\"dW{l}\"]\n",
|
|
" params[f\"b{l}\"] -= alpha * grads[f\"db{l}\"]\n",
|
|
"\n",
|
|
" return params\n",
|
|
"\n",
|
|
"def get_predictions(AL):\n",
|
|
" return xp.argmax(AL, axis=0)\n",
|
|
"\n",
|
|
"def get_accuracy(predictions, Y):\n",
|
|
" return xp.sum(predictions == Y) / Y.size"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold=0.85):\n",
|
|
" params = init_params(layer_dims)\n",
|
|
" best_val_accuracy = 0\n",
|
|
" acc_store = []\n",
|
|
" \n",
|
|
" for i in range(iterations):\n",
|
|
" AL, caches = forward_prop(X_train, params)\n",
|
|
" grads = backward_prop(AL, Y_train, caches)\n",
|
|
" params = update_params(params, grads, alpha)\n",
|
|
"\n",
|
|
" if i % 100 == 0:\n",
|
|
" train_predictions = get_predictions(AL)\n",
|
|
" train_accuracy = get_accuracy(train_predictions, Y_train)\n",
|
|
" \n",
|
|
" val_AL, _ = forward_prop(X_val, params)\n",
|
|
" val_predictions = get_predictions(val_AL)\n",
|
|
" val_accuracy = get_accuracy(val_predictions, Y_val)\n",
|
|
" \n",
|
|
" print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n",
|
|
" acc_store.append((train_accuracy, val_accuracy))\n",
|
|
" \n",
|
|
" if val_accuracy > best_val_accuracy:\n",
|
|
" best_val_accuracy = val_accuracy\n",
|
|
" best_params = params.copy()\n",
|
|
" \n",
|
|
" # Early stopping condition based on validation accuracy threshold\n",
|
|
" if val_accuracy >= accuracy_threshold:\n",
|
|
" print(f\"Validation accuracy threshold of {accuracy_threshold:.2f} reached. Stopping training.\")\n",
|
|
" break\n",
|
|
"\n",
|
|
" return best_params, best_val_accuracy, acc_store"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha, iterations, accuracy_threshold=0.85):\n",
|
|
" results = []\n",
|
|
" \n",
|
|
" for layer_config in layer_configs:\n",
|
|
" layer_dims = [input_size] + list(layer_config) + [output_size]\n",
|
|
" print(f\"Training architecture: {layer_dims}\")\n",
|
|
" best_params, accuracy, acc_store = gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold)\n",
|
|
" results.append((layer_config, accuracy, best_params, acc_store))\n",
|
|
" print(f\"Architecture {layer_dims}: Best Validation Accuracy: {accuracy:.4f}\\n\")\n",
|
|
" \n",
|
|
" return sorted(results, key=lambda x: x[1], reverse=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# def predict(X, parameters):\n",
|
|
"# AL, _ = forward_propagation(X, parameters)\n",
|
|
"# predictions = (AL > 0.5) # Classify as 1 if greater than 0.5\n",
|
|
"# return predictions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"hidden_layers = [1, 2]\n",
|
|
"neurons_per_layer = [64, 128, 256]\n",
|
|
"layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Performing grid search...\n",
|
|
"Training architecture: [1024, 64, 64, 61]\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "TypeError",
|
|
"evalue": "'ndarray' object cannot be interpreted as an integer",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Perform grid search\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPerforming grid search...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m best_configs \u001b[38;5;241m=\u001b[39m \u001b[43mgrid_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer_configs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTop 5 Architectures:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m config, accuracy, _, _ \u001b[38;5;129;01min\u001b[39;00m best_configs[:\u001b[38;5;241m5\u001b[39m]:\n",
|
|
"Cell \u001b[0;32mIn[9], line 7\u001b[0m, in \u001b[0;36mgrid_search\u001b[0;34m(X_train, Y_train, X_val, Y_val, layer_configs, alpha, iterations, accuracy_threshold)\u001b[0m\n\u001b[1;32m 5\u001b[0m layer_dims \u001b[38;5;241m=\u001b[39m [input_size] \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(layer_config) \u001b[38;5;241m+\u001b[39m [output_size]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTraining architecture: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlayer_dims\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 7\u001b[0m best_params, accuracy, acc_store \u001b[38;5;241m=\u001b[39m \u001b[43mgradient_descent\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer_dims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miterations\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccuracy_threshold\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m results\u001b[38;5;241m.\u001b[39mappend((layer_config, accuracy, best_params, acc_store))\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mArchitecture \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlayer_dims\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: Best Validation Accuracy: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00maccuracy\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.4f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
|
"Cell \u001b[0;32mIn[8], line 8\u001b[0m, in \u001b[0;36mgradient_descent\u001b[0;34m(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(iterations):\n\u001b[1;32m 7\u001b[0m AL, caches \u001b[38;5;241m=\u001b[39m forward_prop(X_train, params)\n\u001b[0;32m----> 8\u001b[0m grads \u001b[38;5;241m=\u001b[39m \u001b[43mbackward_prop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mAL\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaches\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m params \u001b[38;5;241m=\u001b[39m update_params(params, grads, alpha)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
|
|
"Cell \u001b[0;32mIn[7], line 42\u001b[0m, in \u001b[0;36mbackward_prop\u001b[0;34m(AL, Y, caches)\u001b[0m\n\u001b[1;32m 40\u001b[0m L \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(caches)\n\u001b[1;32m 41\u001b[0m m \u001b[38;5;241m=\u001b[39m AL\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m---> 42\u001b[0m Y \u001b[38;5;241m=\u001b[39m \u001b[43mone_hot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mY\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 44\u001b[0m dAL \u001b[38;5;241m=\u001b[39m AL \u001b[38;5;241m-\u001b[39m Y\n\u001b[1;32m 45\u001b[0m current_cache \u001b[38;5;241m=\u001b[39m caches[L\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
|
|
"Cell \u001b[0;32mIn[7], line 33\u001b[0m, in \u001b[0;36mone_hot\u001b[0;34m(Y)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mone_hot\u001b[39m(Y):\n\u001b[0;32m---> 33\u001b[0m one_hot_Y \u001b[38;5;241m=\u001b[39m \u001b[43mxp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mzeros\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mY\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 34\u001b[0m one_hot_Y[xp\u001b[38;5;241m.\u001b[39marange(Y\u001b[38;5;241m.\u001b[39msize), Y] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 35\u001b[0m one_hot_Y \u001b[38;5;241m=\u001b[39m one_hot_Y\u001b[38;5;241m.\u001b[39mT\n",
|
|
"File \u001b[0;32m~/.pyenv/versions/semantics/lib/python3.12/site-packages/cupy/_creation/basic.py:248\u001b[0m, in \u001b[0;36mzeros\u001b[0;34m(shape, dtype, order)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mzeros\u001b[39m(\n\u001b[1;32m 230\u001b[0m shape: _ShapeLike,\n\u001b[1;32m 231\u001b[0m dtype: DTypeLike \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m,\n\u001b[1;32m 232\u001b[0m order: _OrderCF \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 233\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NDArray[Any]:\n\u001b[1;32m 234\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a new array of given shape and dtype, filled with zeros.\u001b[39;00m\n\u001b[1;32m 235\u001b[0m \n\u001b[1;32m 236\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 246\u001b[0m \n\u001b[1;32m 247\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 248\u001b[0m a \u001b[38;5;241m=\u001b[39m \u001b[43mcupy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mndarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m a\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mmemset_async(\u001b[38;5;241m0\u001b[39m, a\u001b[38;5;241m.\u001b[39mnbytes)\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m a\n",
|
|
"File \u001b[0;32mcupy/_core/core.pyx:137\u001b[0m, in \u001b[0;36mcupy._core.core.ndarray.__new__\u001b[0;34m()\u001b[0m\n",
|
|
"File \u001b[0;32mcupy/_core/core.pyx:202\u001b[0m, in \u001b[0;36mcupy._core.core._ndarray_base._init\u001b[0;34m()\u001b[0m\n",
|
|
"\u001b[0;31mTypeError\u001b[0m: 'ndarray' object cannot be interpreted as an integer"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Perform grid search\n",
|
|
"print(\"Performing grid search...\")\n",
|
|
"best_configs = grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha=0.01, iterations=4000)\n",
|
|
"\n",
|
|
"print(\"\\nTop 5 Architectures:\")\n",
|
|
"for config, accuracy, _, _ in best_configs[:5]:\n",
|
|
" print(f\"Hidden Layers: {config}, Validation Accuracy: {accuracy:.4f}\")\n",
|
|
"\n",
|
|
"# Select the best configuration\n",
|
|
"best_config, best_accuracy, best_params, best_acc_store = best_configs[0]\n",
|
|
"best_layer_dims = [input_size] + list(best_config) + [output_size]\n",
|
|
"\n",
|
|
"print(f\"\\nBest architecture: {best_layer_dims}\")\n",
|
|
"print(f\"Best validation accuracy: {best_accuracy:.4f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Save the accuracy data for the best model\n",
|
|
"df = pd.DataFrame(best_acc_store, columns=['Train Accuracy', 'Validation Accuracy'])\n",
|
|
"df.to_csv('results/bel_acc.csv', index=False)\n",
|
|
"\n",
|
|
"# Save the weights of the best model\n",
|
|
"np.savez(\"weights/bel_weights\", **best_params)\n",
|
|
"\n",
|
|
"# Evaluate on test set\n",
|
|
"test_AL, _ = forward_prop(X_test, best_params)\n",
|
|
"test_predictions = get_predictions(test_AL)\n",
|
|
"test_accuracy = get_accuracy(test_predictions, Y_test)\n",
|
|
"print(f\"Test Accuracy: {test_accuracy:.4f}\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "semantics",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|