2 changed files with 131 additions and 292 deletions
--- a/bel_NN_dynamic.ipynb
+++ b/bel_NN_dynamic.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@ -37,7 +37,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -61,25 +61,6 @@
    "X_train, X_val = X_train.T, X_val.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Data shapes:\")\n",
    "print(f\"X_train shape: {X_train.shape}\")\n",
    "print(f\"Y_train shape: {Y_train.shape}\")\n",
    "print(f\"X_test shape: {X_test.shape}\")\n",
    "print(f\"Y_test shape: {Y_test.shape}\")\n",
    "\n",
    "print(\"\\nData statistics:\")\n",
    "print(f\"X_train mean: {xp.mean(X_train)}, std: {xp.std(X_train)}\")\n",
    "print(f\"X_test mean: {xp.mean(X_test)}, std: {xp.std(X_test)}\")\n",
    "print(f\"Unique Y_train values: {xp.unique(Y_train)}\")\n",
    "print(f\"Unique Y_test values: {xp.unique(Y_test)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -96,7 +77,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -107,7 +88,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@ -124,7 +105,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -132,8 +113,8 @@
    "    return xp.maximum(Z, 0)\n",
    "\n",
    "def softmax(Z):\n",
-    "    exp_Z = xp.exp(Z - xp.max(Z, axis=0, keepdims=True))\n",
+    "    A = xp.exp(Z) / sum(xp.exp(Z))\n",
-    "    return exp_Z / xp.sum(exp_Z, axis=0, keepdims=True)\n",
+    "    return A\n",
    "\n",
    "def forward_prop(X, params):\n",
    "    caches = []\n",
@ -209,7 +190,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -232,10 +213,6 @@
    "            val_accuracy = get_accuracy(val_predictions, Y_val)\n",
    "            \n",
    "            print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n",
    "            print(f\"Sample predictions: {train_predictions[:10]}\")\n",
    "            print(f\"Sample true labels: {Y_train[:10]}\")\n",
    "            \n",
    "            print(f\"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\")\n",
    "            acc_store.append((train_accuracy, val_accuracy))\n",
    "            \n",
    "            if val_accuracy > best_val_accuracy:\n",
@ -252,7 +229,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@ -271,7 +248,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@ -307,24 +284,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
-    "hidden_layers = [1, 2]\n",
+    "hidden_layers = [1, 2, 3, 4]\n",
-    "neurons_per_layer = [64, 128, 256, 512]\n",
+    "neurons_per_layer = [64, 128, 256]\n",
    "layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(layer_configs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -347,17 +315,6 @@
    "print(f\"Best validation accuracy: {best_accuracy:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"\\nModel Architecture:\")\n",
    "for i in range(1, len(best_params)//2 + 1):\n",
    "    print(f\"Layer {i}: {best_params[f'W{i}'].shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/bel_semantics.ipynb
+++ b/bel_semantics.ipynb
@ -2,21 +2,15 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "import torch\n",
+    "import net.modules\n",
    "\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
-    "from torch.utils.data import DataLoader, TensorDataset\n",
+    "from net.transcoder import Transcoder"
    "from sklearn.model_selection import train_test_split\n",
    "from tqdm import tqdm"
   ]
  },
  {
@ -25,170 +19,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Check if CUDA is available\n",
+    "filepath = 'data/bel_data_test.csv'\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "train_loader, test_loader, input_size = load_and_prepare_data(file_path=filepath)\n",
    "print(f\"Using device: {device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('data/bel_data_test.csv')\n",
    "# Load the data\n",
    "data = np.array(data)\n",
    "\n",
-    "# Split features and labels\n",
+    "print(\"X_train shape:\", input_size.shape)"
    "X = data[:, 1:]  # All columns except the first one\n",
    "y = data[:, 0].astype(int)  # First column as labels\n",
    "\n",
    "# Split the data into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert to PyTorch tensors\n",
    "X_train_tensor = torch.FloatTensor(X_train)\n",
    "y_train_tensor = torch.LongTensor(y_train)\n",
    "X_test_tensor = torch.FloatTensor(X_test)\n",
    "y_test_tensor = torch.LongTensor(y_test)\n",
    "\n",
    "# Create DataLoader objects\n",
    "train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n",
    "test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n",
    "\n",
    "train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)\n",
    "test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the MLP\n",
    "class MLP(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(MLP, self).__init__()\n",
    "        self.input_layer = nn.Linear(1024, 512)\n",
    "        self.h1_layer = nn.Linear(512, 64)\n",
    "        self.h2_layer = nn.Linear(64, 62)\n",
    "        self.relu = nn.ReLU()\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.relu(self.input_layer(x))\n",
    "        x = self.h1_layer(x)\n",
    "        x = self.h2_layer(x)\n",
    "        return x\n",
    "\n",
    "# Define the Decoder\n",
    "class Decoder(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Decoder, self).__init__()\n",
    "        self.h2_h1 = nn.Linear(64, 512)\n",
    "        self.h1_input = nn.Linear(512, 1024)\n",
    "        self.relu = nn.ReLU()\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.relu(self.h2_h1(x))\n",
    "        x = self.h1_input(x)\n",
    "        return x\n",
    "\n",
    "class MLPWithDecoder(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(MLPWithDecoder, self).__init__()\n",
    "        self.mlp = MLP()\n",
    "        self.decoder = Decoder()\n",
    "\n",
    "    def forward(self, x):\n",
    "        # MLP forward pass\n",
    "        h1 = self.mlp.relu(self.mlp.input_layer(x))\n",
    "        h2 = self.mlp.relu(self.mlp.h1_layer(h1))\n",
    "        output = self.mlp.h2_layer(h2)\n",
    "        \n",
    "        # Reconstruction\n",
    "        reconstruction = self.decoder(h2)\n",
    "        \n",
    "        return output, reconstruction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to reconstruct an image\n",
    "def reconstruct_image(model, image):\n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        _, reconstruction = model(image.unsqueeze(0))\n",
    "    return reconstruction.squeeze(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def show_image_comparison(original, reconstructed, label, prediction):\n",
    "    \"\"\"\n",
    "    Display the original and reconstructed images side by side.\n",
    "    \n",
    "    :param original: Original image (1D tensor of 1024 elements)\n",
    "    :param reconstructed: Reconstructed image (1D tensor of 1024 elements)\n",
    "    :param label: True label of the image\n",
    "    :param prediction: Predicted label of the image\n",
    "    \"\"\"\n",
    "    # Convert to numpy arrays and move to CPU if they're on GPU\n",
    "    original = original.cpu().numpy()\n",
    "    reconstructed = reconstructed.cpu().numpy()\n",
    "    \n",
    "    # Reshape the 1D arrays to 32x32 images\n",
    "    original_img = original.reshape(32, 32)\n",
    "    reconstructed_img = reconstructed.reshape(32, 32)\n",
    "    \n",
    "    # Create a figure with two subplots side by side\n",
    "    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))\n",
    "    \n",
    "    # Show original image\n",
    "    ax1.imshow(original_img, cmap='gray')\n",
    "    ax1.set_title(f'Original (Label: {label})')\n",
    "    ax1.axis('off')\n",
    "    \n",
    "    # Show reconstructed image\n",
    "    ax2.imshow(reconstructed_img, cmap='gray')\n",
    "    ax2.set_title(f'Reconstructed (Predicted: {prediction})')\n",
    "    ax2.axis('off')\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the model, loss function, and optimizer\n",
    "model = MLPWithDecoder()\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "reconstruction_criterion = nn.MSELoss()\n",
    "optimizer = optim.Adam(model.parameters())\n",
    "\n",
    "model = model.to(device)\n",
    "criterion = criterion.to(device)\n",
    "reconstruction_criterion = reconstruction_criterion.to(device)"
   ]
  },
  {
@ -197,36 +31,19 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "num_epochs = 250\n",
+    "# input_size = X_train.shape[0]\n",
-    "for epoch in range(num_epochs):\n",
+    "# hidden_size = 128\n",
-    "    model.train()  # Set the model to training mode\n",
+    "# output_size = 61\n",
-    "    running_loss = 0.0\n",
+    "\n",
-    "    \n",
+    "architecture = [input_size, [128], 61]\n",
-    "    # Use tqdm for a progress bar\n",
+    "activations = ['leaky_relu','softmax']"
-    "    with tqdm(train_loader, unit=\"batch\") as tepoch:\n",
+   ]
-    "        for images, labels in tepoch:\n",
+  },
-    "            tepoch.set_description(f\"Epoch {epoch+1}\")\n",
+  {
-    "            \n",
+   "cell_type": "markdown",
-    "            images, labels = images.to(device), labels.to(device)\n",
+   "metadata": {},
-    "            \n",
+   "source": [
-    "            optimizer.zero_grad()\n",
+    "## Initialize transcoder"
    "            \n",
    "            outputs, reconstructions = model(images)\n",
    "            \n",
    "            classification_loss = criterion(outputs, labels)\n",
    "            reconstruction_loss = reconstruction_criterion(reconstructions, images)\n",
    "            total_loss = classification_loss + reconstruction_loss\n",
    "            \n",
    "            total_loss.backward()\n",
    "            optimizer.step()\n",
    "            \n",
    "            running_loss += total_loss.item()\n",
    "            \n",
    "            tepoch.set_postfix(loss=total_loss.item())\n",
    "    \n",
    "    epoch_loss = running_loss / len(train_loader)\n",
    "    \n",
    "    # print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')"
   ]
  },
  {
@ -235,40 +52,105 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "model.eval()  # Set the model to evaluation mode\n",
+    "# bl_transcoder = Transcoder(input_size, hidden_size, output_size, 'leaky_relu', 'softmax')\n",
-    "with torch.no_grad():\n",
+    "bl_transcoder = Transcoder(architecture, hidden_activation='relu', output_activation='softmax')"
-    "    try:\n",
+   ]
-    "        # Get a batch of test data\n",
+  },
-    "        images, labels = next(iter(test_loader))\n",
+  {
-    "        \n",
+   "cell_type": "markdown",
-    "        # Move data to the same device as the model\n",
+   "metadata": {},
-    "        images = images.to(device)\n",
+   "source": [
-    "        labels = labels.to(device)\n",
+    "## Train Encoders and save weights\n"
-    "        \n",
+   ]
-    "        # Forward pass through the model\n",
+  },
-    "        outputs, reconstructions = model(images)\n",
+  {
-    "        \n",
+   "cell_type": "code",
-    "        # Get predicted labels\n",
+   "execution_count": null,
-    "        _, predicted = torch.max(outputs.data, 1)\n",
+   "metadata": {},
-    "        \n",
+   "outputs": [],
-    "        # Display the first few images in the batch\n",
+   "source": [
-    "        num_images_to_show = min(5, len(images))\n",
+    "# # Train the encoder if need\n",
-    "        for i in range(num_images_to_show):\n",
+    "\n",
-    "            show_image_comparison(\n",
+    "bl_transcoder.train_model(train_loader, test_loader, learning_rate=0.001, epochs=1000)\n",
-    "                images[i], \n",
+    "# bl_transcoder.train_with_validation(X_train, Y_train, alpha=0.1, iterations=1000)\n",
-    "                reconstructions[i], \n",
+    "bl_transcoder.save_results('bt_1h128n')"
-    "                labels[i].item(), \n",
+   ]
-    "                predicted[i].item()\n",
+  },
-    "            )\n",
+  {
-    "        \n",
+   "cell_type": "markdown",
-    "        # Calculate and print accuracy\n",
+   "metadata": {},
-    "        correct = (predicted == labels).sum().item()\n",
+   "source": [
-    "        total = labels.size(0)\n",
+    "## Load weights"
-    "        accuracy = 100 * correct / total\n",
+   ]
-    "        print(f'Test Accuracy: {accuracy:.2f}%')\n",
+  },
-    "        \n",
+  {
-    "    except Exception as e:\n",
+   "cell_type": "code",
-    "        print(f\"An error occurred during evaluation: {str(e)}\")"
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bl_transcoder.load_weights('weights/bt_1h128n_leaky_relu_weights.pth')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot learning curves\n",
    "bl_transcoder.plot_learning_curves()\n",
    "\n",
    "# Visualize encoded space\n",
    "bl_transcoder.plot_encoded_space(X_test, Y_test)\n",
    "\n",
    "print(X_test.shape)\n",
    "print(X_train.shape)\n",
    "# Check reconstructions\n",
    "bl_transcoder.plot_reconstructions(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Transcode images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_images = 2\n",
    "indices = np.random.choice(X_test.shape[1], num_images, replace=False)\n",
    "\n",
    "for idx in indices:\n",
    "    original_image = X_test[:, idx]\n",
    "    \n",
    "    # Encode the image\n",
    "    encoded = bl_transcoder.encode_image(original_image.reshape(-1, 1))\n",
    "    \n",
    "    # Decode the image\n",
    "    decoded = bl_transcoder.decode_image(encoded)\n",
    "\n",
    "    # Visualize original, encoded, and decoded images\n",
    "    visualize_transcoding(original_image, encoded, decoded, idx)\n",
    "\n",
    "    print(f\"Image {idx}:\")\n",
    "    print(\"Original shape:\", original_image.shape)\n",
    "    print(\"Encoded shape:\", encoded.shape)\n",
    "    print(\"Decoded shape:\", decoded.shape)\n",
    "    print(\"Encoded vector:\", encoded.flatten())  # Print flattened encoded vector\n",
    "    print(\"\\n\")"
   ]
  },
  {