Compare commits
2 commits
1f4e31fe70
...
30feb25e94
| Author | SHA1 | Date | |
|---|---|---|---|
| 30feb25e94 | |||
| 2ea7b8c4a7 |
1 changed files with 118 additions and 35 deletions
|
|
@ -2,24 +2,47 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import cupy as cp\n",
|
||||
"\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from itertools import product"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" import cupy as cp\n",
|
||||
" if cp.cuda.is_available():\n",
|
||||
" print(\"GPU is available. Using CuPy for GPU acceleration.\")\n",
|
||||
" xp = cp\n",
|
||||
" else:\n",
|
||||
" print(\"GPU is not available. Falling back to NumPy on CPU.\")\n",
|
||||
" xp = np\n",
|
||||
"except ImportError:\n",
|
||||
" print(\"CuPy not found. Using NumPy on CPU.\")\n",
|
||||
" xp = np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = pd.read_csv('data/bel_data_test.csv')\n",
|
||||
"data = np.array(data)\n",
|
||||
"data = xp.array(data)\n",
|
||||
"\n",
|
||||
"# Split data\n",
|
||||
"X = data[:, 1:].T\n",
|
||||
|
|
@ -54,7 +77,18 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train, Y_train = xp.array(X_train), xp.array(Y_train)\n",
|
||||
"X_val, Y_val = xp.array(X_val), xp.array(Y_val)\n",
|
||||
"X_test, Y_test = xp.array(X_test), xp.array(Y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -63,23 +97,23 @@
|
|||
" L = len(layer_dims)\n",
|
||||
" \n",
|
||||
" for l in range(1, L):\n",
|
||||
" params[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2. / layer_dims[l-1])\n",
|
||||
" params[f'b{l}'] = np.zeros((layer_dims[l], 1))\n",
|
||||
" params[f'W{l}'] = xp.random.randn(layer_dims[l], layer_dims[l-1]) * xp.sqrt(2. / layer_dims[l-1])\n",
|
||||
" params[f'b{l}'] = xp.zeros((layer_dims[l], 1))\n",
|
||||
" \n",
|
||||
" return params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ReLU(Z):\n",
|
||||
" return np.maximum(Z, 0)\n",
|
||||
" return xp.maximum(Z, 0)\n",
|
||||
"\n",
|
||||
"def softmax(Z):\n",
|
||||
" A = np.exp(Z) / sum(np.exp(Z))\n",
|
||||
" A = xp.exp(Z) / sum(xp.exp(Z))\n",
|
||||
" return A\n",
|
||||
"\n",
|
||||
"def forward_prop(X, params):\n",
|
||||
|
|
@ -91,13 +125,13 @@
|
|||
" A_prev = A\n",
|
||||
" W = params[f'W{l}']\n",
|
||||
" b = params[f'b{l}']\n",
|
||||
" Z = np.dot(W, A_prev) + b\n",
|
||||
" Z = xp.dot(W, A_prev) + b\n",
|
||||
" A = ReLU(Z)\n",
|
||||
" caches.append((A_prev, W, b, Z))\n",
|
||||
"\n",
|
||||
" WL = params[f'W{L}']\n",
|
||||
" bL = params[f'b{L}']\n",
|
||||
" ZL = np.dot(WL, A) + bL\n",
|
||||
" ZL = xp.dot(WL, A) + bL\n",
|
||||
" AL = softmax(ZL)\n",
|
||||
" caches.append((A, WL, bL, ZL))\n",
|
||||
"\n",
|
||||
|
|
@ -107,10 +141,14 @@
|
|||
" return Z > 0\n",
|
||||
"\n",
|
||||
"def one_hot(Y):\n",
|
||||
" one_hot_Y = np.zeros((Y.size, Y.max() + 1))\n",
|
||||
" one_hot_Y[np.arange(Y.size), Y] = 1\n",
|
||||
" one_hot_Y = one_hot_Y.T\n",
|
||||
" return one_hot_Y\n",
|
||||
" # one_hot_Y = xp.zeros((Y.size, Y.max() + 1))\n",
|
||||
" # one_hot_Y[xp.arange(Y.size), Y] = 1\n",
|
||||
" # one_hot_Y = one_hot_Y.T\n",
|
||||
" # return one_hot_Y\n",
|
||||
" Y = Y.astype(int)\n",
|
||||
" one_hot_Y = xp.zeros((Y.size, int(xp.max(Y)) + 1))\n",
|
||||
" one_hot_Y[xp.arange(Y.size), Y] = 1\n",
|
||||
" return one_hot_Y.T\n",
|
||||
"\n",
|
||||
"def backward_prop(AL, Y, caches):\n",
|
||||
" grads = {}\n",
|
||||
|
|
@ -120,17 +158,17 @@
|
|||
"\n",
|
||||
" dAL = AL - Y\n",
|
||||
" current_cache = caches[L-1]\n",
|
||||
" grads[f\"dW{L}\"] = 1 / m * np.dot(dAL, current_cache[0].T)\n",
|
||||
" grads[f\"db{L}\"] = 1 / m * np.sum(dAL, axis=1, keepdims=True)\n",
|
||||
" dA_prev = np.dot(current_cache[1].T, dAL)\n",
|
||||
" grads[f\"dW{L}\"] = 1 / m * xp.dot(dAL, current_cache[0].T)\n",
|
||||
" grads[f\"db{L}\"] = 1 / m * xp.sum(dAL, axis=1, keepdims=True)\n",
|
||||
" dA_prev = xp.dot(current_cache[1].T, dAL)\n",
|
||||
"\n",
|
||||
" for l in reversed(range(L-1)):\n",
|
||||
" current_cache = caches[l]\n",
|
||||
" dZ = dA_prev * ReLU_deriv(current_cache[3])\n",
|
||||
" grads[f\"dW{l+1}\"] = 1 / m * np.dot(dZ, current_cache[0].T)\n",
|
||||
" grads[f\"db{l+1}\"] = 1 / m * np.sum(dZ, axis=1, keepdims=True)\n",
|
||||
" grads[f\"dW{l+1}\"] = 1 / m * xp.dot(dZ, current_cache[0].T)\n",
|
||||
" grads[f\"db{l+1}\"] = 1 / m * xp.sum(dZ, axis=1, keepdims=True)\n",
|
||||
" if l > 0:\n",
|
||||
" dA_prev = np.dot(current_cache[1].T, dZ)\n",
|
||||
" dA_prev = xp.dot(current_cache[1].T, dZ)\n",
|
||||
"\n",
|
||||
" return grads\n",
|
||||
"\n",
|
||||
|
|
@ -144,15 +182,15 @@
|
|||
" return params\n",
|
||||
"\n",
|
||||
"def get_predictions(AL):\n",
|
||||
" return np.argmax(AL, axis=0)\n",
|
||||
" return xp.argmax(AL, axis=0)\n",
|
||||
"\n",
|
||||
"def get_accuracy(predictions, Y):\n",
|
||||
" return np.sum(predictions == Y) / Y.size"
|
||||
" return xp.sum(predictions == Y) / Y.size"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -191,7 +229,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -210,23 +248,47 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# def predict(X, parameters):\n",
|
||||
"# AL, _ = forward_propagation(X, parameters)\n",
|
||||
"# predictions = (AL > 0.5) # Classify as 1 if greater than 0.5\n",
|
||||
"# return predictions"
|
||||
"def evaluate_model(X, Y, params):\n",
|
||||
" correct_predictions = 0\n",
|
||||
" total_samples = X.shape[1]\n",
|
||||
" predictions = []\n",
|
||||
" actual_labels = []\n",
|
||||
" \n",
|
||||
" for i in range(total_samples):\n",
|
||||
" x = X[:, i:i+1] # Get a single sample\n",
|
||||
" y = Y[i]\n",
|
||||
" \n",
|
||||
" AL, _ = forward_prop(x, params)\n",
|
||||
" prediction = int(get_predictions(AL)[0])\n",
|
||||
" \n",
|
||||
" predictions.append(prediction)\n",
|
||||
" actual_labels.append(int(y))\n",
|
||||
" \n",
|
||||
" if prediction == y:\n",
|
||||
" correct_predictions += 1\n",
|
||||
" \n",
|
||||
" accuracy = correct_predictions / total_samples\n",
|
||||
" \n",
|
||||
" return {\n",
|
||||
" 'accuracy': accuracy,\n",
|
||||
" 'predictions': predictions,\n",
|
||||
" 'actual_labels': actual_labels,\n",
|
||||
" 'correct_predictions': correct_predictions,\n",
|
||||
" 'total_samples': total_samples\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hidden_layers = [1, 2]\n",
|
||||
"hidden_layers = [1, 2, 3, 4]\n",
|
||||
"neurons_per_layer = [64, 128, 256]\n",
|
||||
"layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))"
|
||||
]
|
||||
|
|
@ -264,14 +326,35 @@
|
|||
"df.to_csv('results/bel_acc.csv', index=False)\n",
|
||||
"\n",
|
||||
"# Save the weights of the best model\n",
|
||||
"np.savez(\"weights/bel_weights\", **best_params)\n",
|
||||
"np.savez(\"weights/bel_weights.npz\", **best_params)\n",
|
||||
"\n",
|
||||
"# Evaluate on test set\n",
|
||||
"test_AL, _ = forward_prop(X_test, best_params)\n",
|
||||
"test_AL, _ = forward_prop(X_test, {k: xp.array(v) for k, v in best_params.items()})\n",
|
||||
"test_predictions = get_predictions(test_AL)\n",
|
||||
"test_accuracy = get_accuracy(test_predictions, Y_test)\n",
|
||||
"test_accuracy = float(get_accuracy(test_predictions, Y_test))\n",
|
||||
"print(f\"Test Accuracy: {test_accuracy:.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use the function\n",
|
||||
"print(\"\\nEvaluating on 500 test samples:\")\n",
|
||||
"test_results = evaluate_model(X_test, Y_test, best_params)\n",
|
||||
"\n",
|
||||
"print(f\"Test Accuracy (500 samples): {test_results['accuracy']:.4f}\")\n",
|
||||
"print(f\"Correct predictions: {test_results['correct_predictions']} out of {test_results['total_samples']}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue