Initial commit

This commit is contained in:
Murtadha 2024-09-26 17:23:23 -04:00
commit 1995df58ce
21 changed files with 6708 additions and 0 deletions

View file

@ -0,0 +1,152 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e2a5d1d7-6bb3-4e24-9067-880296de1fc9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import imageio\n",
"from skimage import color, transform\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b37f1351-0a00-4a4b-9067-ea55a662bc80",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"main_folder = 'data/Bel_Training_Set/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "76f41177-fd53-4bf6-9e75-ba1a98c414ff",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"subfolders = [f for f in os.listdir(main_folder) if os.path.isdir(os.path.join(main_folder, f))]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "706d2a6d-8147-42a1-ba19-3cc7108fcfea",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"image_data = []\n",
"label_data = []"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "86841170-b9bc-46cf-b482-f2d653060bc0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"docnames = [\"Pixel \" + str(i) for i in range(1024)]\n",
"docnames.insert(0, 'Label')\n",
"df1 = pd.DataFrame(columns = docnames) "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d9e5d953-1652-47d3-a832-d71d87c2b7ee",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def add_to_dataset(x,y,z,l):\n",
" y.at[z,'Label'] = l\n",
" for i in range(0,1024):\n",
" y.at[z,docnames[i+1]] = x[i]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "82fe02ed-8471-493f-aded-58c54edb7ef6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"i = 0\n",
"for subfolder in subfolders:\n",
" subfolder_path = os.path.join(main_folder, subfolder)\n",
" for filename in os.listdir(subfolder_path):\n",
" \n",
" file_path = os.path.join(subfolder_path, filename)\n",
" if filename.lower().endswith('.ppm'):\n",
" img_array = imageio.v2.imread(file_path)\n",
" resized_img_array = transform.resize(img_array, (32, 32))\n",
" gray_img_array = color.rgb2gray(resized_img_array)\n",
" flattened_img_array = gray_img_array.flatten()\n",
" add_to_dataset(flattened_img_array,df1,i,int(subfolder))\n",
" i = i + 1\n",
" #print(\"Image From\", int(subfolder), \"Image Name\", filename)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7bdcd7d7-56f3-4b9f-924f-dd811dddf605",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df1.to_csv('bel_data_test.csv', index = False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12d9c974-85ed-4d10-af2e-0984a367d4be",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,152 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0a9a579c-df1c-4742-a0ed-e3db27bdc3e4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd \n",
"import os\n",
"import imageio\n",
"from skimage import color, transform\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "99e5b7bf-4438-477c-9880-008fb864ae56",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"image_folder = 'data/Cro_Training_Set/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "da337206-82a6-416c-b5db-04466695a7b4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def convert_letter(x):\n",
" if x == 'A':\n",
" y = 0\n",
" if x == 'B':\n",
" y = 1\n",
" if x == 'C':\n",
" y = 2\n",
" if x == 'D':\n",
" y = 3\n",
" if x == 'E':\n",
" y = 4\n",
" return y\n",
"def add_to_dataset(x,y,z,l):\n",
" y.at[z,'Label'] = l\n",
" for i in range(0,1024):\n",
" y.at[z,docnames[i+1]] = x[i]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0d28e8d0-9386-4ccd-a7bf-f19e9d59d6f6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"docnames = [\"Pixel \" + str(i) for i in range(1024)]\n",
"docnames.insert(0, 'Label')\n",
"df1 = pd.DataFrame(columns = docnames) "
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ce03500d-7d48-493f-a00c-94c1d79bf02d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"i = 0\n",
"for filename in os.listdir(image_folder):\n",
" file_path = os.path.join(image_folder, filename)\n",
"\n",
" # Check if the file is a PNG file\n",
" if filename.lower().endswith('.bmp'):\n",
" # Extract the single letter from the filename (adjust the index accordingly)\n",
" single_letter = filename[0]\n",
"\n",
" # Read the image and convert it to a NumPy array\n",
" img_array = imageio.v2.imread(file_path)\n",
"\n",
" # Resize the image to 32x32\n",
" resized_img_array = transform.resize(img_array, (32, 32))\n",
"\n",
" # Convert the RGB image to grayscale\n",
" gray_img_array = color.rgb2gray(resized_img_array)\n",
"\n",
" # Flatten the image to 1024\n",
" flattened_img_array = gray_img_array.flatten()\n",
" \n",
" label = convert_letter(single_letter)\n",
" add_to_dataset(flattened_img_array,df1,i,label)\n",
" i = i + 1\n",
"\n",
" # Append the processed image data to the list\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "02056706-60c3-4390-990a-9f84bb56c049",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df1.to_csv('cro_data_test.csv', index = False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66da76d9-6286-4e1e-b91e-007f43642c14",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,382 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e9cfe5db-43cb-4298-9388-d869d7314ea2",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9a476b58-bb18-4499-96cd-4bf38ca7566f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def img_flat(x):\n",
" gray_img = np.mean(x, axis=0)\n",
" flat_img = gray_img.flatten()\n",
" return flat_img\n",
"def add_to_dataset(x,y,z,l):\n",
" y.at[z,'Label'] = l\n",
" for i in range(0,1024):\n",
" y.at[z,docnames[i+1]] = x[i]\n",
" #print(z , \"Completed\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6130f3c5-97bd-4be8-8751-9ffbae99436b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"docnames = [\"Pixel \" + str(i) for i in range(1024)]\n",
"docnames.insert(0, 'Label')\n",
"df1 = pd.DataFrame(columns = docnames) "
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0d080bf5-067b-47a5-99dc-b22f145115b6",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_Images.zip to data/gtsrb/GTSRB_Final_Test_Images.zip\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 88978620/88978620 [00:10<00:00, 8777572.15it/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting data/gtsrb/GTSRB_Final_Test_Images.zip to data/gtsrb\n",
"Downloading https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_GT.zip to data/gtsrb/GTSRB_Final_Test_GT.zip\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 99620/99620 [00:00<00:00, 289763.24it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting data/gtsrb/GTSRB_Final_Test_GT.zip to data/gtsrb\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from torchvision.datasets import GTSRB\n",
"from torchvision import transforms\n",
"\n",
"# Define a transform to convert the data to a NumPy array\n",
"transform = transforms.Compose([\n",
" transforms.Resize((32, 32)), \n",
" transforms.ToTensor(),\n",
"])\n",
"\n",
"# Download the dataset\n",
"dataset = GTSRB(root='./data', split=\"test\", transform=transform, download=True)\n",
"\n",
"\n",
"\n",
"# Iterate through the dataset and apply transformations\n",
"for i in range(len(dataset)):\n",
" image, label = dataset[i]\n",
" label = int(label)\n",
" # Convert the PyTorch tensor to a NumPy array\n",
" image_np = np.array(image)\n",
" temp_img = img_flat(image_np)\n",
" add_to_dataset(temp_img,df1,i,label)\n",
" #data['label'].append(label)\n",
" \n",
" \n",
"# Convert the data to a DataFrame\n",
"#df = pd.DataFrame(data)\n",
"\n",
"# Save the DataFrame to a CSV file\n",
"#df.to_csv('gtsrb_data.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1b64da5c-1326-4258-8066-6ab5debfec9d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Label</th>\n",
" <th>Pixel 0</th>\n",
" <th>Pixel 1</th>\n",
" <th>Pixel 2</th>\n",
" <th>Pixel 3</th>\n",
" <th>Pixel 4</th>\n",
" <th>Pixel 5</th>\n",
" <th>Pixel 6</th>\n",
" <th>Pixel 7</th>\n",
" <th>Pixel 8</th>\n",
" <th>...</th>\n",
" <th>Pixel 1014</th>\n",
" <th>Pixel 1015</th>\n",
" <th>Pixel 1016</th>\n",
" <th>Pixel 1017</th>\n",
" <th>Pixel 1018</th>\n",
" <th>Pixel 1019</th>\n",
" <th>Pixel 1020</th>\n",
" <th>Pixel 1021</th>\n",
" <th>Pixel 1022</th>\n",
" <th>Pixel 1023</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>16</td>\n",
" <td>0.563399</td>\n",
" <td>0.556863</td>\n",
" <td>0.559477</td>\n",
" <td>0.560784</td>\n",
" <td>0.555556</td>\n",
" <td>0.550327</td>\n",
" <td>0.54902</td>\n",
" <td>0.546405</td>\n",
" <td>0.537255</td>\n",
" <td>...</td>\n",
" <td>0.551634</td>\n",
" <td>0.54902</td>\n",
" <td>0.545098</td>\n",
" <td>0.550327</td>\n",
" <td>0.554248</td>\n",
" <td>0.54902</td>\n",
" <td>0.539869</td>\n",
" <td>0.547712</td>\n",
" <td>0.551634</td>\n",
" <td>0.554248</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0.256209</td>\n",
" <td>0.303268</td>\n",
" <td>0.311111</td>\n",
" <td>0.329412</td>\n",
" <td>0.294118</td>\n",
" <td>0.304575</td>\n",
" <td>0.308497</td>\n",
" <td>0.222222</td>\n",
" <td>0.160784</td>\n",
" <td>...</td>\n",
" <td>0.865359</td>\n",
" <td>0.810458</td>\n",
" <td>0.524183</td>\n",
" <td>0.265359</td>\n",
" <td>0.201307</td>\n",
" <td>0.213072</td>\n",
" <td>0.228758</td>\n",
" <td>0.240523</td>\n",
" <td>0.27451</td>\n",
" <td>0.281046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>38</td>\n",
" <td>0.171242</td>\n",
" <td>0.166013</td>\n",
" <td>0.164706</td>\n",
" <td>0.166013</td>\n",
" <td>0.164706</td>\n",
" <td>0.15817</td>\n",
" <td>0.162092</td>\n",
" <td>0.163399</td>\n",
" <td>0.160784</td>\n",
" <td>...</td>\n",
" <td>0.150327</td>\n",
" <td>0.115033</td>\n",
" <td>0.135948</td>\n",
" <td>0.118954</td>\n",
" <td>0.115033</td>\n",
" <td>0.134641</td>\n",
" <td>0.142484</td>\n",
" <td>0.155556</td>\n",
" <td>0.169935</td>\n",
" <td>0.179085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>33</td>\n",
" <td>0.449673</td>\n",
" <td>0.329412</td>\n",
" <td>0.247059</td>\n",
" <td>0.266667</td>\n",
" <td>0.383007</td>\n",
" <td>0.532026</td>\n",
" <td>0.64183</td>\n",
" <td>0.661438</td>\n",
" <td>0.718954</td>\n",
" <td>...</td>\n",
" <td>0.477124</td>\n",
" <td>0.562092</td>\n",
" <td>0.654902</td>\n",
" <td>0.776471</td>\n",
" <td>0.738562</td>\n",
" <td>0.696732</td>\n",
" <td>0.756863</td>\n",
" <td>0.877124</td>\n",
" <td>0.946405</td>\n",
" <td>0.882353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11</td>\n",
" <td>0.132026</td>\n",
" <td>0.145098</td>\n",
" <td>0.15817</td>\n",
" <td>0.155556</td>\n",
" <td>0.150327</td>\n",
" <td>0.145098</td>\n",
" <td>0.15817</td>\n",
" <td>0.184314</td>\n",
" <td>0.203922</td>\n",
" <td>...</td>\n",
" <td>0.147712</td>\n",
" <td>0.141176</td>\n",
" <td>0.138562</td>\n",
" <td>0.145098</td>\n",
" <td>0.151634</td>\n",
" <td>0.156863</td>\n",
" <td>0.155556</td>\n",
" <td>0.162092</td>\n",
" <td>0.171242</td>\n",
" <td>0.177778</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 1025 columns</p>\n",
"</div>"
],
"text/plain": [
" Label Pixel 0 Pixel 1 Pixel 2 Pixel 3 Pixel 4 Pixel 5 Pixel 6 \\\n",
"0 16 0.563399 0.556863 0.559477 0.560784 0.555556 0.550327 0.54902 \n",
"1 1 0.256209 0.303268 0.311111 0.329412 0.294118 0.304575 0.308497 \n",
"2 38 0.171242 0.166013 0.164706 0.166013 0.164706 0.15817 0.162092 \n",
"3 33 0.449673 0.329412 0.247059 0.266667 0.383007 0.532026 0.64183 \n",
"4 11 0.132026 0.145098 0.15817 0.155556 0.150327 0.145098 0.15817 \n",
"\n",
" Pixel 7 Pixel 8 ... Pixel 1014 Pixel 1015 Pixel 1016 Pixel 1017 \\\n",
"0 0.546405 0.537255 ... 0.551634 0.54902 0.545098 0.550327 \n",
"1 0.222222 0.160784 ... 0.865359 0.810458 0.524183 0.265359 \n",
"2 0.163399 0.160784 ... 0.150327 0.115033 0.135948 0.118954 \n",
"3 0.661438 0.718954 ... 0.477124 0.562092 0.654902 0.776471 \n",
"4 0.184314 0.203922 ... 0.147712 0.141176 0.138562 0.145098 \n",
"\n",
" Pixel 1018 Pixel 1019 Pixel 1020 Pixel 1021 Pixel 1022 Pixel 1023 \n",
"0 0.554248 0.54902 0.539869 0.547712 0.551634 0.554248 \n",
"1 0.201307 0.213072 0.228758 0.240523 0.27451 0.281046 \n",
"2 0.115033 0.134641 0.142484 0.155556 0.169935 0.179085 \n",
"3 0.738562 0.696732 0.756863 0.877124 0.946405 0.882353 \n",
"4 0.151634 0.156863 0.155556 0.162092 0.171242 0.177778 \n",
"\n",
"[5 rows x 1025 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "deaf22c0-5aae-45e4-a4db-196fbcc001a1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df1.to_csv('gtsrb_data_test.csv', index = False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbc65ef5-4313-42ed-8690-557ebca488b8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}