diff --git a/bel_NN_dynamic.ipynb b/bel_NN_dynamic.ipynb index 9245661..ba9358c 100644 --- a/bel_NN_dynamic.ipynb +++ b/bel_NN_dynamic.ipynb @@ -18,17 +18,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GPU is available. Using CuPy for GPU acceleration.\n" - ] - } - ], + "outputs": [], "source": [ "try:\n", " import cupy as cp\n", @@ -71,18 +63,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input layer size: 1024\n", - "Output layer size: 61\n" - ] - } - ], + "outputs": [], "source": [ "# Determine input and output layer sizes\n", "input_size = X_train.shape[0]\n", @@ -265,14 +248,38 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "# def predict(X, parameters):\n", - "# AL, _ = forward_propagation(X, parameters)\n", - "# predictions = (AL > 0.5) # Classify as 1 if greater than 0.5\n", - "# return predictions" + "def evaluate_model(X, Y, params):\n", + " correct_predictions = 0\n", + " total_samples = X.shape[1]\n", + " predictions = []\n", + " actual_labels = []\n", + " \n", + " for i in range(total_samples):\n", + " x = X[:, i:i+1] # Get a single sample\n", + " y = Y[i]\n", + " \n", + " AL, _ = forward_prop(x, params)\n", + " prediction = int(get_predictions(AL)[0])\n", + " \n", + " predictions.append(prediction)\n", + " actual_labels.append(int(y))\n", + " \n", + " if prediction == y:\n", + " correct_predictions += 1\n", + " \n", + " accuracy = correct_predictions / total_samples\n", + " \n", + " return {\n", + " 'accuracy': accuracy,\n", + " 'predictions': predictions,\n", + " 'actual_labels': actual_labels,\n", + " 'correct_predictions': correct_predictions,\n", + " 'total_samples': total_samples\n", + " }" ] }, { @@ -281,43 +288,16 @@ "metadata": {}, "outputs": [], "source": [ - "hidden_layers = [1, 2]\n", + "hidden_layers = [1, 2, 3, 4]\n", "neurons_per_layer = [64, 128, 256]\n", "layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Performing grid search...\n", - "Training architecture: [1024, 64, 64, 61]\n" - ] - }, - { - "ename": "TypeError", - "evalue": "'ndarray' object cannot be interpreted as an integer", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Perform grid search\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPerforming grid search...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m best_configs \u001b[38;5;241m=\u001b[39m \u001b[43mgrid_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer_configs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTop 5 Architectures:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m config, accuracy, _, _ \u001b[38;5;129;01min\u001b[39;00m best_configs[:\u001b[38;5;241m5\u001b[39m]:\n", - "Cell \u001b[0;32mIn[9], line 7\u001b[0m, in \u001b[0;36mgrid_search\u001b[0;34m(X_train, Y_train, X_val, Y_val, layer_configs, alpha, iterations, accuracy_threshold)\u001b[0m\n\u001b[1;32m 5\u001b[0m layer_dims \u001b[38;5;241m=\u001b[39m [input_size] \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(layer_config) \u001b[38;5;241m+\u001b[39m [output_size]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTraining architecture: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlayer_dims\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 7\u001b[0m best_params, accuracy, acc_store \u001b[38;5;241m=\u001b[39m \u001b[43mgradient_descent\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_val\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer_dims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miterations\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccuracy_threshold\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m results\u001b[38;5;241m.\u001b[39mappend((layer_config, accuracy, best_params, acc_store))\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mArchitecture \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlayer_dims\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: Best Validation Accuracy: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00maccuracy\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.4f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "Cell \u001b[0;32mIn[8], line 8\u001b[0m, in \u001b[0;36mgradient_descent\u001b[0;34m(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(iterations):\n\u001b[1;32m 7\u001b[0m AL, caches \u001b[38;5;241m=\u001b[39m forward_prop(X_train, params)\n\u001b[0;32m----> 8\u001b[0m grads \u001b[38;5;241m=\u001b[39m \u001b[43mbackward_prop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mAL\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaches\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m params \u001b[38;5;241m=\u001b[39m update_params(params, grads, alpha)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", - "Cell \u001b[0;32mIn[7], line 42\u001b[0m, in \u001b[0;36mbackward_prop\u001b[0;34m(AL, Y, caches)\u001b[0m\n\u001b[1;32m 40\u001b[0m L \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(caches)\n\u001b[1;32m 41\u001b[0m m \u001b[38;5;241m=\u001b[39m AL\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m---> 42\u001b[0m Y \u001b[38;5;241m=\u001b[39m \u001b[43mone_hot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mY\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 44\u001b[0m dAL \u001b[38;5;241m=\u001b[39m AL \u001b[38;5;241m-\u001b[39m Y\n\u001b[1;32m 45\u001b[0m current_cache \u001b[38;5;241m=\u001b[39m caches[L\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n", - "Cell \u001b[0;32mIn[7], line 33\u001b[0m, in \u001b[0;36mone_hot\u001b[0;34m(Y)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mone_hot\u001b[39m(Y):\n\u001b[0;32m---> 33\u001b[0m one_hot_Y \u001b[38;5;241m=\u001b[39m \u001b[43mxp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mzeros\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mY\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mY\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 34\u001b[0m one_hot_Y[xp\u001b[38;5;241m.\u001b[39marange(Y\u001b[38;5;241m.\u001b[39msize), Y] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 35\u001b[0m one_hot_Y \u001b[38;5;241m=\u001b[39m one_hot_Y\u001b[38;5;241m.\u001b[39mT\n", - "File \u001b[0;32m~/.pyenv/versions/semantics/lib/python3.12/site-packages/cupy/_creation/basic.py:248\u001b[0m, in \u001b[0;36mzeros\u001b[0;34m(shape, dtype, order)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mzeros\u001b[39m(\n\u001b[1;32m 230\u001b[0m shape: _ShapeLike,\n\u001b[1;32m 231\u001b[0m dtype: DTypeLike \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m,\n\u001b[1;32m 232\u001b[0m order: _OrderCF \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 233\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NDArray[Any]:\n\u001b[1;32m 234\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a new array of given shape and dtype, filled with zeros.\u001b[39;00m\n\u001b[1;32m 235\u001b[0m \n\u001b[1;32m 236\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 246\u001b[0m \n\u001b[1;32m 247\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 248\u001b[0m a \u001b[38;5;241m=\u001b[39m \u001b[43mcupy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mndarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m a\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mmemset_async(\u001b[38;5;241m0\u001b[39m, a\u001b[38;5;241m.\u001b[39mnbytes)\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m a\n", - "File \u001b[0;32mcupy/_core/core.pyx:137\u001b[0m, in \u001b[0;36mcupy._core.core.ndarray.__new__\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mcupy/_core/core.pyx:202\u001b[0m, in \u001b[0;36mcupy._core.core._ndarray_base._init\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: 'ndarray' object cannot be interpreted as an integer" - ] - } - ], + "outputs": [], "source": [ "# Perform grid search\n", "print(\"Performing grid search...\")\n", @@ -346,14 +326,35 @@ "df.to_csv('results/bel_acc.csv', index=False)\n", "\n", "# Save the weights of the best model\n", - "np.savez(\"weights/bel_weights\", **best_params)\n", + "np.savez(\"weights/bel_weights.npz\", **best_params)\n", "\n", "# Evaluate on test set\n", - "test_AL, _ = forward_prop(X_test, best_params)\n", + "test_AL, _ = forward_prop(X_test, {k: xp.array(v) for k, v in best_params.items()})\n", "test_predictions = get_predictions(test_AL)\n", - "test_accuracy = get_accuracy(test_predictions, Y_test)\n", + "test_accuracy = float(get_accuracy(test_predictions, Y_test))\n", "print(f\"Test Accuracy: {test_accuracy:.4f}\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use the function\n", + "print(\"\\nEvaluating on 500 test samples:\")\n", + "test_results = evaluate_model(X_test, Y_test, best_params)\n", + "\n", + "print(f\"Test Accuracy (500 samples): {test_results['accuracy']:.4f}\")\n", + "print(f\"Correct predictions: {test_results['correct_predictions']} out of {test_results['total_samples']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {