diff --git a/bel_NN_dynamic.ipynb b/bel_NN_dynamic.ipynb index f17e505..ba9358c 100644 --- a/bel_NN_dynamic.ipynb +++ b/bel_NN_dynamic.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -61,25 +61,6 @@ "X_train, X_val = X_train.T, X_val.T" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Data shapes:\")\n", - "print(f\"X_train shape: {X_train.shape}\")\n", - "print(f\"Y_train shape: {Y_train.shape}\")\n", - "print(f\"X_test shape: {X_test.shape}\")\n", - "print(f\"Y_test shape: {Y_test.shape}\")\n", - "\n", - "print(\"\\nData statistics:\")\n", - "print(f\"X_train mean: {xp.mean(X_train)}, std: {xp.std(X_train)}\")\n", - "print(f\"X_test mean: {xp.mean(X_test)}, std: {xp.std(X_test)}\")\n", - "print(f\"Unique Y_train values: {xp.unique(Y_train)}\")\n", - "print(f\"Unique Y_test values: {xp.unique(Y_test)}\")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -96,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -107,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -132,8 +113,8 @@ " return xp.maximum(Z, 0)\n", "\n", "def softmax(Z):\n", - " exp_Z = xp.exp(Z - xp.max(Z, axis=0, keepdims=True))\n", - " return exp_Z / xp.sum(exp_Z, axis=0, keepdims=True)\n", + " A = xp.exp(Z) / sum(xp.exp(Z))\n", + " return A\n", "\n", "def forward_prop(X, params):\n", " caches = []\n", @@ -209,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -232,10 +213,6 @@ " val_accuracy = get_accuracy(val_predictions, Y_val)\n", " \n", " print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n", - " print(f\"Sample predictions: {train_predictions[:10]}\")\n", - " print(f\"Sample true labels: {Y_train[:10]}\")\n", - " \n", - " print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n", " acc_store.append((train_accuracy, val_accuracy))\n", " \n", " if val_accuracy > best_val_accuracy:\n", @@ -252,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -271,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -307,24 +284,15 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "hidden_layers = [1, 2]\n", - "neurons_per_layer = [64, 128, 256, 512]\n", + "hidden_layers = [1, 2, 3, 4]\n", + "neurons_per_layer = [64, 128, 256]\n", "layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(layer_configs)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -347,17 +315,6 @@ "print(f\"Best validation accuracy: {best_accuracy:.4f}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"\\nModel Architecture:\")\n", - "for i in range(1, len(best_params)//2 + 1):\n", - " print(f\"Layer {i}: {best_params[f'W{i}'].shape}\")" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/bel_semantics.ipynb b/bel_semantics.ipynb index f0924cc..3904138 100644 --- a/bel_semantics.ipynb +++ b/bel_semantics.ipynb @@ -2,21 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import torch\n", + "import net.modules\n", "\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", "\n", - "from torch.utils.data import DataLoader, TensorDataset\n", - "from sklearn.model_selection import train_test_split\n", - "from tqdm import tqdm" + "from net.transcoder import Transcoder" ] }, { @@ -25,170 +19,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Check if CUDA is available\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "print(f\"Using device: {device}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "data = pd.read_csv('data/bel_data_test.csv')\n", - "# Load the data\n", - "data = np.array(data)\n", + "filepath = 'data/bel_data_test.csv'\n", + "train_loader, test_loader, input_size = load_and_prepare_data(file_path=filepath)\n", "\n", - "# Split features and labels\n", - "X = data[:, 1:] # All columns except the first one\n", - "y = data[:, 0].astype(int) # First column as labels\n", - "\n", - "# Split the data into training and testing sets\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Convert to PyTorch tensors\n", - "X_train_tensor = torch.FloatTensor(X_train)\n", - "y_train_tensor = torch.LongTensor(y_train)\n", - "X_test_tensor = torch.FloatTensor(X_test)\n", - "y_test_tensor = torch.LongTensor(y_test)\n", - "\n", - "# Create DataLoader objects\n", - "train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n", - "test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n", - "\n", - "train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)\n", - "test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Define the MLP\n", - "class MLP(nn.Module):\n", - " def __init__(self):\n", - " super(MLP, self).__init__()\n", - " self.input_layer = nn.Linear(1024, 512)\n", - " self.h1_layer = nn.Linear(512, 64)\n", - " self.h2_layer = nn.Linear(64, 62)\n", - " self.relu = nn.ReLU()\n", - "\n", - " def forward(self, x):\n", - " x = self.relu(self.input_layer(x))\n", - " x = self.h1_layer(x)\n", - " x = self.h2_layer(x)\n", - " return x\n", - "\n", - "# Define the Decoder\n", - "class Decoder(nn.Module):\n", - " def __init__(self):\n", - " super(Decoder, self).__init__()\n", - " self.h2_h1 = nn.Linear(64, 512)\n", - " self.h1_input = nn.Linear(512, 1024)\n", - " self.relu = nn.ReLU()\n", - "\n", - " def forward(self, x):\n", - " x = self.relu(self.h2_h1(x))\n", - " x = self.h1_input(x)\n", - " return x\n", - "\n", - "class MLPWithDecoder(nn.Module):\n", - " def __init__(self):\n", - " super(MLPWithDecoder, self).__init__()\n", - " self.mlp = MLP()\n", - " self.decoder = Decoder()\n", - "\n", - " def forward(self, x):\n", - " # MLP forward pass\n", - " h1 = self.mlp.relu(self.mlp.input_layer(x))\n", - " h2 = self.mlp.relu(self.mlp.h1_layer(h1))\n", - " output = self.mlp.h2_layer(h2)\n", - " \n", - " # Reconstruction\n", - " reconstruction = self.decoder(h2)\n", - " \n", - " return output, reconstruction" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Function to reconstruct an image\n", - "def reconstruct_image(model, image):\n", - " model.eval()\n", - " with torch.no_grad():\n", - " _, reconstruction = model(image.unsqueeze(0))\n", - " return reconstruction.squeeze(0)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def show_image_comparison(original, reconstructed, label, prediction):\n", - " \"\"\"\n", - " Display the original and reconstructed images side by side.\n", - " \n", - " :param original: Original image (1D tensor of 1024 elements)\n", - " :param reconstructed: Reconstructed image (1D tensor of 1024 elements)\n", - " :param label: True label of the image\n", - " :param prediction: Predicted label of the image\n", - " \"\"\"\n", - " # Convert to numpy arrays and move to CPU if they're on GPU\n", - " original = original.cpu().numpy()\n", - " reconstructed = reconstructed.cpu().numpy()\n", - " \n", - " # Reshape the 1D arrays to 32x32 images\n", - " original_img = original.reshape(32, 32)\n", - " reconstructed_img = reconstructed.reshape(32, 32)\n", - " \n", - " # Create a figure with two subplots side by side\n", - " fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))\n", - " \n", - " # Show original image\n", - " ax1.imshow(original_img, cmap='gray')\n", - " ax1.set_title(f'Original (Label: {label})')\n", - " ax1.axis('off')\n", - " \n", - " # Show reconstructed image\n", - " ax2.imshow(reconstructed_img, cmap='gray')\n", - " ax2.set_title(f'Reconstructed (Predicted: {prediction})')\n", - " ax2.axis('off')\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize the model, loss function, and optimizer\n", - "model = MLPWithDecoder()\n", - "criterion = nn.CrossEntropyLoss()\n", - "reconstruction_criterion = nn.MSELoss()\n", - "optimizer = optim.Adam(model.parameters())\n", - "\n", - "model = model.to(device)\n", - "criterion = criterion.to(device)\n", - "reconstruction_criterion = reconstruction_criterion.to(device)" + "print(\"X_train shape:\", input_size.shape)" ] }, { @@ -197,36 +31,19 @@ "metadata": {}, "outputs": [], "source": [ - "num_epochs = 250\n", - "for epoch in range(num_epochs):\n", - " model.train() # Set the model to training mode\n", - " running_loss = 0.0\n", - " \n", - " # Use tqdm for a progress bar\n", - " with tqdm(train_loader, unit=\"batch\") as tepoch:\n", - " for images, labels in tepoch:\n", - " tepoch.set_description(f\"Epoch {epoch+1}\")\n", - " \n", - " images, labels = images.to(device), labels.to(device)\n", - " \n", - " optimizer.zero_grad()\n", - " \n", - " outputs, reconstructions = model(images)\n", - " \n", - " classification_loss = criterion(outputs, labels)\n", - " reconstruction_loss = reconstruction_criterion(reconstructions, images)\n", - " total_loss = classification_loss + reconstruction_loss\n", - " \n", - " total_loss.backward()\n", - " optimizer.step()\n", - " \n", - " running_loss += total_loss.item()\n", - " \n", - " tepoch.set_postfix(loss=total_loss.item())\n", - " \n", - " epoch_loss = running_loss / len(train_loader)\n", - " \n", - " # print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')" + "# input_size = X_train.shape[0]\n", + "# hidden_size = 128\n", + "# output_size = 61\n", + "\n", + "architecture = [input_size, [128], 61]\n", + "activations = ['leaky_relu','softmax']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize transcoder" ] }, { @@ -235,40 +52,105 @@ "metadata": {}, "outputs": [], "source": [ - "model.eval() # Set the model to evaluation mode\n", - "with torch.no_grad():\n", - " try:\n", - " # Get a batch of test data\n", - " images, labels = next(iter(test_loader))\n", - " \n", - " # Move data to the same device as the model\n", - " images = images.to(device)\n", - " labels = labels.to(device)\n", - " \n", - " # Forward pass through the model\n", - " outputs, reconstructions = model(images)\n", - " \n", - " # Get predicted labels\n", - " _, predicted = torch.max(outputs.data, 1)\n", - " \n", - " # Display the first few images in the batch\n", - " num_images_to_show = min(5, len(images))\n", - " for i in range(num_images_to_show):\n", - " show_image_comparison(\n", - " images[i], \n", - " reconstructions[i], \n", - " labels[i].item(), \n", - " predicted[i].item()\n", - " )\n", - " \n", - " # Calculate and print accuracy\n", - " correct = (predicted == labels).sum().item()\n", - " total = labels.size(0)\n", - " accuracy = 100 * correct / total\n", - " print(f'Test Accuracy: {accuracy:.2f}%')\n", - " \n", - " except Exception as e:\n", - " print(f\"An error occurred during evaluation: {str(e)}\")" + "# bl_transcoder = Transcoder(input_size, hidden_size, output_size, 'leaky_relu', 'softmax')\n", + "bl_transcoder = Transcoder(architecture, hidden_activation='relu', output_activation='softmax')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train Encoders and save weights\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Train the encoder if need\n", + "\n", + "bl_transcoder.train_model(train_loader, test_loader, learning_rate=0.001, epochs=1000)\n", + "# bl_transcoder.train_with_validation(X_train, Y_train, alpha=0.1, iterations=1000)\n", + "bl_transcoder.save_results('bt_1h128n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bl_transcoder.load_weights('weights/bt_1h128n_leaky_relu_weights.pth')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot learning curves\n", + "bl_transcoder.plot_learning_curves()\n", + "\n", + "# Visualize encoded space\n", + "bl_transcoder.plot_encoded_space(X_test, Y_test)\n", + "\n", + "print(X_test.shape)\n", + "print(X_train.shape)\n", + "# Check reconstructions\n", + "bl_transcoder.plot_reconstructions(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Transcode images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_images = 2\n", + "indices = np.random.choice(X_test.shape[1], num_images, replace=False)\n", + "\n", + "for idx in indices:\n", + " original_image = X_test[:, idx]\n", + " \n", + " # Encode the image\n", + " encoded = bl_transcoder.encode_image(original_image.reshape(-1, 1))\n", + " \n", + " # Decode the image\n", + " decoded = bl_transcoder.decode_image(encoded)\n", + "\n", + " # Visualize original, encoded, and decoded images\n", + " visualize_transcoding(original_image, encoded, decoded, idx)\n", + "\n", + " print(f\"Image {idx}:\")\n", + " print(\"Original shape:\", original_image.shape)\n", + " print(\"Encoded shape:\", encoded.shape)\n", + " print(\"Decoded shape:\", decoded.shape)\n", + " print(\"Encoded vector:\", encoded.flatten()) # Print flattened encoded vector\n", + " print(\"\\n\")" ] }, {