semantics/NoNeed/Germna_Data_Loader.ipynb
2024-09-26 17:23:23 -04:00

382 lines
12 KiB
Text
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e9cfe5db-43cb-4298-9388-d869d7314ea2",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9a476b58-bb18-4499-96cd-4bf38ca7566f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def img_flat(x):\n",
" gray_img = np.mean(x, axis=0)\n",
" flat_img = gray_img.flatten()\n",
" return flat_img\n",
"def add_to_dataset(x,y,z,l):\n",
" y.at[z,'Label'] = l\n",
" for i in range(0,1024):\n",
" y.at[z,docnames[i+1]] = x[i]\n",
" #print(z , \"Completed\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6130f3c5-97bd-4be8-8751-9ffbae99436b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"docnames = [\"Pixel \" + str(i) for i in range(1024)]\n",
"docnames.insert(0, 'Label')\n",
"df1 = pd.DataFrame(columns = docnames) "
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0d080bf5-067b-47a5-99dc-b22f145115b6",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_Images.zip to data/gtsrb/GTSRB_Final_Test_Images.zip\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 88978620/88978620 [00:10<00:00, 8777572.15it/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting data/gtsrb/GTSRB_Final_Test_Images.zip to data/gtsrb\n",
"Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_GT.zip to data/gtsrb/GTSRB_Final_Test_GT.zip\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 99620/99620 [00:00<00:00, 289763.24it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting data/gtsrb/GTSRB_Final_Test_GT.zip to data/gtsrb\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from torchvision.datasets import GTSRB\n",
"from torchvision import transforms\n",
"\n",
"# Define a transform to convert the data to a NumPy array\n",
"transform = transforms.Compose([\n",
" transforms.Resize((32, 32)), \n",
" transforms.ToTensor(),\n",
"])\n",
"\n",
"# Download the dataset\n",
"dataset = GTSRB(root='./data', split=\"test\", transform=transform, download=True)\n",
"\n",
"\n",
"\n",
"# Iterate through the dataset and apply transformations\n",
"for i in range(len(dataset)):\n",
" image, label = dataset[i]\n",
" label = int(label)\n",
" # Convert the PyTorch tensor to a NumPy array\n",
" image_np = np.array(image)\n",
" temp_img = img_flat(image_np)\n",
" add_to_dataset(temp_img,df1,i,label)\n",
" #data['label'].append(label)\n",
" \n",
" \n",
"# Convert the data to a DataFrame\n",
"#df = pd.DataFrame(data)\n",
"\n",
"# Save the DataFrame to a CSV file\n",
"#df.to_csv('gtsrb_data.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1b64da5c-1326-4258-8066-6ab5debfec9d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Label</th>\n",
" <th>Pixel 0</th>\n",
" <th>Pixel 1</th>\n",
" <th>Pixel 2</th>\n",
" <th>Pixel 3</th>\n",
" <th>Pixel 4</th>\n",
" <th>Pixel 5</th>\n",
" <th>Pixel 6</th>\n",
" <th>Pixel 7</th>\n",
" <th>Pixel 8</th>\n",
" <th>...</th>\n",
" <th>Pixel 1014</th>\n",
" <th>Pixel 1015</th>\n",
" <th>Pixel 1016</th>\n",
" <th>Pixel 1017</th>\n",
" <th>Pixel 1018</th>\n",
" <th>Pixel 1019</th>\n",
" <th>Pixel 1020</th>\n",
" <th>Pixel 1021</th>\n",
" <th>Pixel 1022</th>\n",
" <th>Pixel 1023</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>16</td>\n",
" <td>0.563399</td>\n",
" <td>0.556863</td>\n",
" <td>0.559477</td>\n",
" <td>0.560784</td>\n",
" <td>0.555556</td>\n",
" <td>0.550327</td>\n",
" <td>0.54902</td>\n",
" <td>0.546405</td>\n",
" <td>0.537255</td>\n",
" <td>...</td>\n",
" <td>0.551634</td>\n",
" <td>0.54902</td>\n",
" <td>0.545098</td>\n",
" <td>0.550327</td>\n",
" <td>0.554248</td>\n",
" <td>0.54902</td>\n",
" <td>0.539869</td>\n",
" <td>0.547712</td>\n",
" <td>0.551634</td>\n",
" <td>0.554248</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0.256209</td>\n",
" <td>0.303268</td>\n",
" <td>0.311111</td>\n",
" <td>0.329412</td>\n",
" <td>0.294118</td>\n",
" <td>0.304575</td>\n",
" <td>0.308497</td>\n",
" <td>0.222222</td>\n",
" <td>0.160784</td>\n",
" <td>...</td>\n",
" <td>0.865359</td>\n",
" <td>0.810458</td>\n",
" <td>0.524183</td>\n",
" <td>0.265359</td>\n",
" <td>0.201307</td>\n",
" <td>0.213072</td>\n",
" <td>0.228758</td>\n",
" <td>0.240523</td>\n",
" <td>0.27451</td>\n",
" <td>0.281046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>38</td>\n",
" <td>0.171242</td>\n",
" <td>0.166013</td>\n",
" <td>0.164706</td>\n",
" <td>0.166013</td>\n",
" <td>0.164706</td>\n",
" <td>0.15817</td>\n",
" <td>0.162092</td>\n",
" <td>0.163399</td>\n",
" <td>0.160784</td>\n",
" <td>...</td>\n",
" <td>0.150327</td>\n",
" <td>0.115033</td>\n",
" <td>0.135948</td>\n",
" <td>0.118954</td>\n",
" <td>0.115033</td>\n",
" <td>0.134641</td>\n",
" <td>0.142484</td>\n",
" <td>0.155556</td>\n",
" <td>0.169935</td>\n",
" <td>0.179085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>33</td>\n",
" <td>0.449673</td>\n",
" <td>0.329412</td>\n",
" <td>0.247059</td>\n",
" <td>0.266667</td>\n",
" <td>0.383007</td>\n",
" <td>0.532026</td>\n",
" <td>0.64183</td>\n",
" <td>0.661438</td>\n",
" <td>0.718954</td>\n",
" <td>...</td>\n",
" <td>0.477124</td>\n",
" <td>0.562092</td>\n",
" <td>0.654902</td>\n",
" <td>0.776471</td>\n",
" <td>0.738562</td>\n",
" <td>0.696732</td>\n",
" <td>0.756863</td>\n",
" <td>0.877124</td>\n",
" <td>0.946405</td>\n",
" <td>0.882353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11</td>\n",
" <td>0.132026</td>\n",
" <td>0.145098</td>\n",
" <td>0.15817</td>\n",
" <td>0.155556</td>\n",
" <td>0.150327</td>\n",
" <td>0.145098</td>\n",
" <td>0.15817</td>\n",
" <td>0.184314</td>\n",
" <td>0.203922</td>\n",
" <td>...</td>\n",
" <td>0.147712</td>\n",
" <td>0.141176</td>\n",
" <td>0.138562</td>\n",
" <td>0.145098</td>\n",
" <td>0.151634</td>\n",
" <td>0.156863</td>\n",
" <td>0.155556</td>\n",
" <td>0.162092</td>\n",
" <td>0.171242</td>\n",
" <td>0.177778</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 1025 columns</p>\n",
"</div>"
],
"text/plain": [
" Label Pixel 0 Pixel 1 Pixel 2 Pixel 3 Pixel 4 Pixel 5 Pixel 6 \\\n",
"0 16 0.563399 0.556863 0.559477 0.560784 0.555556 0.550327 0.54902 \n",
"1 1 0.256209 0.303268 0.311111 0.329412 0.294118 0.304575 0.308497 \n",
"2 38 0.171242 0.166013 0.164706 0.166013 0.164706 0.15817 0.162092 \n",
"3 33 0.449673 0.329412 0.247059 0.266667 0.383007 0.532026 0.64183 \n",
"4 11 0.132026 0.145098 0.15817 0.155556 0.150327 0.145098 0.15817 \n",
"\n",
" Pixel 7 Pixel 8 ... Pixel 1014 Pixel 1015 Pixel 1016 Pixel 1017 \\\n",
"0 0.546405 0.537255 ... 0.551634 0.54902 0.545098 0.550327 \n",
"1 0.222222 0.160784 ... 0.865359 0.810458 0.524183 0.265359 \n",
"2 0.163399 0.160784 ... 0.150327 0.115033 0.135948 0.118954 \n",
"3 0.661438 0.718954 ... 0.477124 0.562092 0.654902 0.776471 \n",
"4 0.184314 0.203922 ... 0.147712 0.141176 0.138562 0.145098 \n",
"\n",
" Pixel 1018 Pixel 1019 Pixel 1020 Pixel 1021 Pixel 1022 Pixel 1023 \n",
"0 0.554248 0.54902 0.539869 0.547712 0.551634 0.554248 \n",
"1 0.201307 0.213072 0.228758 0.240523 0.27451 0.281046 \n",
"2 0.115033 0.134641 0.142484 0.155556 0.169935 0.179085 \n",
"3 0.738562 0.696732 0.756863 0.877124 0.946405 0.882353 \n",
"4 0.151634 0.156863 0.155556 0.162092 0.171242 0.177778 \n",
"\n",
"[5 rows x 1025 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "deaf22c0-5aae-45e4-a4db-196fbcc001a1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df1.to_csv('gtsrb_data_test.csv', index = False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbc65ef5-4313-42ed-8690-557ebca488b8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}