just to delete from local
This commit is contained in:
parent
fa15717c84
commit
898d7348ee
2 changed files with 190 additions and 95 deletions
|
|
@ -22,9 +22,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using device: cuda\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Check if CUDA is available\n",
|
||||
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
||||
|
|
@ -35,18 +43,40 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Train set shape: (2860, 1024)\n",
|
||||
"Validation set shape: (715, 1024)\n",
|
||||
"Test set shape: (1000, 1024)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = pd.read_csv('data/bel_data_test.csv')\n",
|
||||
"# Load the data\n",
|
||||
"data = np.array(data)\n",
|
||||
"\n",
|
||||
"# Split features and labels\n",
|
||||
"X = data[:, 1:] # All columns except the first one\n",
|
||||
"y = data[:, 0].astype(int) # First column as labels\n",
|
||||
"\n",
|
||||
"# Split the data into training and testing sets\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||
"# Create test set from the first thousand rows\n",
|
||||
"X_test = X[:1000]\n",
|
||||
"y_test = y[:1000]\n",
|
||||
"\n",
|
||||
"# Use the remaining data for train and validation\n",
|
||||
"X_remaining = X[1000:]\n",
|
||||
"y_remaining = y[1000:]\n",
|
||||
"\n",
|
||||
"# Split the remaining data into training and validation sets\n",
|
||||
"X_train, X_val, y_train, y_val = train_test_split(X_remaining, y_remaining, test_size=0.2, random_state=42)\n",
|
||||
"\n",
|
||||
"# Print the shapes of the resulting sets\n",
|
||||
"print(f\"Train set shape: {X_train.shape}\")\n",
|
||||
"print(f\"Validation set shape: {X_val.shape}\")\n",
|
||||
"print(f\"Test set shape: {X_test.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -55,18 +85,18 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Convert to PyTorch tensors\n",
|
||||
"X_train_tensor = torch.FloatTensor(X_train)\n",
|
||||
"y_train_tensor = torch.LongTensor(y_train)\n",
|
||||
"X_test_tensor = torch.FloatTensor(X_test)\n",
|
||||
"y_test_tensor = torch.LongTensor(y_test)\n",
|
||||
"# # Convert to PyTorch tensors\n",
|
||||
"# X_train_tensor = torch.FloatTensor(X_train)\n",
|
||||
"# y_train_tensor = torch.LongTensor(y_train)\n",
|
||||
"# X_test_tensor = torch.FloatTensor(X_test)\n",
|
||||
"# y_test_tensor = torch.LongTensor(y_test)\n",
|
||||
"\n",
|
||||
"# Create DataLoader objects\n",
|
||||
"train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n",
|
||||
"test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n",
|
||||
"# # Create DataLoader objects\n",
|
||||
"# train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n",
|
||||
"# test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n",
|
||||
"\n",
|
||||
"train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)\n",
|
||||
"test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)"
|
||||
"# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)\n",
|
||||
"# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -75,45 +105,50 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SemanticsMLP(nn.Module):\n",
|
||||
"class SemanticsMLP:\n",
|
||||
" def __init__(self, input_size=1024, hidden_sizes=[512, 256, 128], num_classes=62):\n",
|
||||
" super(SemanticsMLP, self).__init__()\n",
|
||||
" self.input_size = input_size\n",
|
||||
" self.hidden_sizes = hidden_sizes\n",
|
||||
" self.num_classes = num_classes\n",
|
||||
"\n",
|
||||
" # Encoder (feature extractor)\n",
|
||||
" self.encoder_layers = nn.ModuleList()\n",
|
||||
" # Initialize weights and biases\n",
|
||||
" self.encoder_weights = []\n",
|
||||
" self.encoder_biases = []\n",
|
||||
" prev_size = input_size\n",
|
||||
" for hidden_size in hidden_sizes:\n",
|
||||
" self.encoder_layers.append(nn.Linear(prev_size, hidden_size))\n",
|
||||
" self.encoder_weights.append(np.random.randn(prev_size, hidden_size) * np.sqrt(2. / prev_size))\n",
|
||||
" self.encoder_biases.append(np.zeros(hidden_size))\n",
|
||||
" prev_size = hidden_size\n",
|
||||
"\n",
|
||||
" # Classifier\n",
|
||||
" self.classifier = nn.Linear(hidden_sizes[-1], num_classes)\n",
|
||||
" self.classifier_weight = np.random.randn(hidden_sizes[-1], num_classes) * np.sqrt(2. / hidden_sizes[-1])\n",
|
||||
" self.classifier_bias = np.zeros(num_classes)\n",
|
||||
"\n",
|
||||
" # Decoder\n",
|
||||
" self.decoder_layers = nn.ModuleList()\n",
|
||||
" self.decoder_weights = []\n",
|
||||
" self.decoder_biases = []\n",
|
||||
" reversed_hidden_sizes = list(reversed(hidden_sizes))\n",
|
||||
" prev_size = hidden_sizes[-1]\n",
|
||||
" for hidden_size in reversed_hidden_sizes[1:] + [input_size]:\n",
|
||||
" self.decoder_layers.append(nn.Linear(prev_size, hidden_size))\n",
|
||||
" self.decoder_weights.append(np.random.randn(prev_size, hidden_size) * np.sqrt(2. / prev_size))\n",
|
||||
" self.decoder_biases.append(np.zeros(hidden_size))\n",
|
||||
" prev_size = hidden_size\n",
|
||||
"\n",
|
||||
" def relu(self, x):\n",
|
||||
" return np.maximum(0, x)\n",
|
||||
"\n",
|
||||
" def encode(self, x):\n",
|
||||
" for layer in self.encoder_layers:\n",
|
||||
" x = F.relu(layer(x))\n",
|
||||
" for weight, bias in zip(self.encoder_weights, self.encoder_biases):\n",
|
||||
" x = self.relu(np.dot(x, weight) + bias)\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
" def decode(self, x):\n",
|
||||
" for layer in self.decoder_layers[:-1]:\n",
|
||||
" x = F.relu(layer(x))\n",
|
||||
" x = self.decoder_layers[-1](x) # No activation on the final layer\n",
|
||||
" for weight, bias in zip(self.decoder_weights[:-1], self.decoder_biases[:-1]):\n",
|
||||
" x = self.relu(np.dot(x, weight) + bias)\n",
|
||||
" x = np.dot(x, self.decoder_weights[-1]) + self.decoder_biases[-1] # No activation on the final layer\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
" def forward(self, x):\n",
|
||||
" encoded = self.encode(x)\n",
|
||||
" logits = self.classifier(encoded)\n",
|
||||
" logits = np.dot(encoded, self.classifier_weight) + self.classifier_bias\n",
|
||||
" reconstructed = self.decode(encoded)\n",
|
||||
" return logits, reconstructed"
|
||||
]
|
||||
|
|
@ -123,6 +158,72 @@
|
|||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def softmax(x):\n",
|
||||
" exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))\n",
|
||||
" return exp_x / np.sum(exp_x, axis=1, keepdims=True)\n",
|
||||
"\n",
|
||||
"def cross_entropy_loss(y_pred, y_true):\n",
|
||||
" m = y_true.shape[0]\n",
|
||||
" p = softmax(y_pred)\n",
|
||||
" log_likelihood = -np.log(p[range(m), y_true])\n",
|
||||
" loss = np.sum(log_likelihood) / m\n",
|
||||
" return loss\n",
|
||||
"\n",
|
||||
"def cross_entropy_gradient(y_pred, y_true):\n",
|
||||
" m = y_true.shape[0]\n",
|
||||
" grad = softmax(y_pred)\n",
|
||||
" grad[range(m), y_true] -= 1\n",
|
||||
" grad = grad / m\n",
|
||||
" return grad\n",
|
||||
"\n",
|
||||
"def mse_loss(y_pred, y_true):\n",
|
||||
" return np.mean((y_pred - y_true) ** 2)\n",
|
||||
"\n",
|
||||
"def mse_gradient(y_pred, y_true):\n",
|
||||
" return 2 * (y_pred - y_true) / y_true.shape[0]\n",
|
||||
"\n",
|
||||
"def train_step(model, X, y, learning_rate):\n",
|
||||
" # Forward pass\n",
|
||||
" logits, reconstructed = model.forward(X)\n",
|
||||
" \n",
|
||||
" # Compute gradients\n",
|
||||
" ce_grad = cross_entropy_gradient(logits, y)\n",
|
||||
" mse_grad = mse_gradient(reconstructed, X)\n",
|
||||
" \n",
|
||||
" # Backpropagation (simplified, not computing full gradients for all layers)\n",
|
||||
" encoded = model.encode(X)\n",
|
||||
" \n",
|
||||
" # Update classifier\n",
|
||||
" model.classifier_weight -= learning_rate * np.dot(encoded.T, ce_grad)\n",
|
||||
" model.classifier_bias -= learning_rate * np.sum(ce_grad, axis=0)\n",
|
||||
" \n",
|
||||
" # Update decoder (last layer only for simplicity)\n",
|
||||
" decoder_grad = np.dot(encoded.T, mse_grad)\n",
|
||||
" if decoder_grad.shape != model.decoder_weights[-1].shape:\n",
|
||||
" raise ValueError(f\"Shape mismatch: decoder_grad {decoder_grad.shape}, decoder_weights[-1] {model.decoder_weights[-1].shape}\")\n",
|
||||
" model.decoder_weights[-1] -= learning_rate * decoder_grad\n",
|
||||
" model.decoder_biases[-1] -= learning_rate * np.sum(mse_grad, axis=0)\n",
|
||||
" \n",
|
||||
" # Compute loss\n",
|
||||
" ce_loss = cross_entropy_loss(logits, y)\n",
|
||||
" mse_loss_val = mse_loss(reconstructed, X)\n",
|
||||
" \n",
|
||||
" return ce_loss, mse_loss_val\n",
|
||||
"\n",
|
||||
"def evaluate(model, X, y):\n",
|
||||
" logits, reconstructed = model.forward(X)\n",
|
||||
" ce_loss = cross_entropy_loss(logits, y)\n",
|
||||
" mse_loss_val = mse_loss(reconstructed, X)\n",
|
||||
" accuracy = np.mean(np.argmax(logits, axis=1) == y)\n",
|
||||
" return ce_loss, mse_loss_val, accuracy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def show_image_comparison(original, reconstructed, label, prediction):\n",
|
||||
" \"\"\"\n",
|
||||
|
|
@ -160,14 +261,64 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = SemanticsMLP(input_size=1024, hidden_sizes=[10], num_classes=62).to(device)\n",
|
||||
"criterion = nn.CrossEntropyLoss()\n",
|
||||
"reconstruction_criterion = nn.MSELoss()\n",
|
||||
"optimizer = optim.Adam(model.parameters(), lr=0.001)"
|
||||
"input_size = X_train.shape[1]\n",
|
||||
"num_classes = len(np.unique(y))\n",
|
||||
"model = SemanticsMLP(input_size=input_size, hidden_sizes=[512, 256, 128], num_classes=num_classes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "operands could not be broadcast together with shapes (512,1024) (128,1024) (512,1024) ",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[9], line 17\u001b[0m\n\u001b[1;32m 14\u001b[0m X_batch \u001b[38;5;241m=\u001b[39m X_train[i:i\u001b[38;5;241m+\u001b[39mbatch_size]\n\u001b[1;32m 15\u001b[0m y_batch \u001b[38;5;241m=\u001b[39m y_train[i:i\u001b[38;5;241m+\u001b[39mbatch_size]\n\u001b[0;32m---> 17\u001b[0m ce_loss, mse_loss_val \u001b[38;5;241m=\u001b[39m \u001b[43mtrain_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_batch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_batch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlearning_rate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;66;03m# Evaluate on validation set (using test set as validation for simplicity)\u001b[39;00m\n\u001b[1;32m 20\u001b[0m val_ce_loss, val_mse_loss, val_accuracy \u001b[38;5;241m=\u001b[39m evaluate(model, X_test, y_test)\n",
|
||||
"Cell \u001b[0;32mIn[6], line 41\u001b[0m, in \u001b[0;36mtrain_step\u001b[0;34m(model, X, y, learning_rate)\u001b[0m\n\u001b[1;32m 38\u001b[0m model\u001b[38;5;241m.\u001b[39mclassifier_bias \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39msum(ce_grad, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 40\u001b[0m \u001b[38;5;66;03m# Update decoder (last layer only for simplicity)\u001b[39;00m\n\u001b[0;32m---> 41\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecoder_weights\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mlearning_rate\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mencoded\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mT\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmse_grad\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 42\u001b[0m model\u001b[38;5;241m.\u001b[39mdecoder_biases[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39msum(mse_grad, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# Compute loss\u001b[39;00m\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (512,1024) (128,1024) (512,1024) "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"epochs = 100\n",
|
||||
"batch_size = 32\n",
|
||||
"learning_rate = 0.001\n",
|
||||
"\n",
|
||||
"for epoch in range(epochs):\n",
|
||||
" # Shuffle the training data\n",
|
||||
" indices = np.arange(X_train.shape[0])\n",
|
||||
" np.random.shuffle(indices)\n",
|
||||
" X_train = X_train[indices]\n",
|
||||
" y_train = y_train[indices]\n",
|
||||
" \n",
|
||||
" # Mini-batch training\n",
|
||||
" for i in range(0, X_train.shape[0], batch_size):\n",
|
||||
" X_batch = X_train[i:i+batch_size]\n",
|
||||
" y_batch = y_train[i:i+batch_size]\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" ce_loss, mse_loss_val = train_step(model, X_batch, y_batch, learning_rate)\n",
|
||||
" except ValueError as e:\n",
|
||||
" print(f\"Error in batch {i // batch_size}: {e}\")\n",
|
||||
" print(f\"X_batch shape: {X_batch.shape}\")\n",
|
||||
" print(f\"y_batch shape: {y_batch.shape}\")\n",
|
||||
" raise\n",
|
||||
" \n",
|
||||
" # Evaluate on validation set\n",
|
||||
" val_ce_loss, val_mse_loss, val_accuracy = evaluate(model, X_val, y_val)\n",
|
||||
" \n",
|
||||
" if epoch % 10 == 0:\n",
|
||||
" print(f\"Epoch {epoch}, Val CE Loss: {val_ce_loss:.4f}, Val MSE Loss: {val_mse_loss:.4f}, Val Accuracy: {val_accuracy:.4f}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -176,64 +327,8 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_epochs = 250\n",
|
||||
"for epoch in range(num_epochs):\n",
|
||||
" model.train()\n",
|
||||
" running_loss = 0.0\n",
|
||||
" \n",
|
||||
" with tqdm(train_loader, unit=\"batch\") as tepoch:\n",
|
||||
" for images, labels in tepoch:\n",
|
||||
" tepoch.set_description(f\"Epoch {epoch+1}\")\n",
|
||||
" \n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" \n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" \n",
|
||||
" logits, reconstructed = model(images)\n",
|
||||
" \n",
|
||||
" classification_loss = criterion(logits, labels)\n",
|
||||
" reconstruction_loss = reconstruction_criterion(reconstructed, images)\n",
|
||||
" total_loss = classification_loss + reconstruction_loss\n",
|
||||
" \n",
|
||||
" total_loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" \n",
|
||||
" running_loss += total_loss.item()\n",
|
||||
" \n",
|
||||
" tepoch.set_postfix(loss=total_loss.item())\n",
|
||||
" \n",
|
||||
" epoch_loss = running_loss / len(train_loader)\n",
|
||||
" # print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.eval()\n",
|
||||
"with torch.no_grad():\n",
|
||||
" images, labels = next(iter(test_loader))\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" \n",
|
||||
" logits, reconstructed = model(images)\n",
|
||||
" \n",
|
||||
" _, predicted = torch.max(logits.data, 1)\n",
|
||||
" \n",
|
||||
" num_images_to_show = min(5, len(images))\n",
|
||||
" for i in range(num_images_to_show):\n",
|
||||
" show_image_comparison(\n",
|
||||
" images[i], \n",
|
||||
" reconstructed[i], \n",
|
||||
" labels[i].item(), \n",
|
||||
" predicted[i].item()\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" correct = (predicted == labels).sum().item()\n",
|
||||
" total = labels.size(0)\n",
|
||||
" accuracy = 100 * correct / total\n",
|
||||
" print(f'Test Accuracy: {accuracy:.2f}%')"
|
||||
"test_ce_loss, test_mse_loss, test_accuracy = evaluate(model, X_test, y_test)\n",
|
||||
"print(f\"Final Test CE Loss: {test_ce_loss:.4f}, Test MSE Loss: {test_mse_loss:.4f}, Test Accuracy: {test_accuracy:.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue