semantics/NoNeed/Cro_Data_Loader.ipynb
2024-09-26 17:23:23 -04:00

152 lines
3.6 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0a9a579c-df1c-4742-a0ed-e3db27bdc3e4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd \n",
"import os\n",
"import imageio\n",
"from skimage import color, transform\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "99e5b7bf-4438-477c-9880-008fb864ae56",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"image_folder = 'data/Cro_Training_Set/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "da337206-82a6-416c-b5db-04466695a7b4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def convert_letter(x):\n",
" if x == 'A':\n",
" y = 0\n",
" if x == 'B':\n",
" y = 1\n",
" if x == 'C':\n",
" y = 2\n",
" if x == 'D':\n",
" y = 3\n",
" if x == 'E':\n",
" y = 4\n",
" return y\n",
"def add_to_dataset(x,y,z,l):\n",
" y.at[z,'Label'] = l\n",
" for i in range(0,1024):\n",
" y.at[z,docnames[i+1]] = x[i]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0d28e8d0-9386-4ccd-a7bf-f19e9d59d6f6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"docnames = [\"Pixel \" + str(i) for i in range(1024)]\n",
"docnames.insert(0, 'Label')\n",
"df1 = pd.DataFrame(columns = docnames) "
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ce03500d-7d48-493f-a00c-94c1d79bf02d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"i = 0\n",
"for filename in os.listdir(image_folder):\n",
" file_path = os.path.join(image_folder, filename)\n",
"\n",
" # Check if the file is a PNG file\n",
" if filename.lower().endswith('.bmp'):\n",
" # Extract the single letter from the filename (adjust the index accordingly)\n",
" single_letter = filename[0]\n",
"\n",
" # Read the image and convert it to a NumPy array\n",
" img_array = imageio.v2.imread(file_path)\n",
"\n",
" # Resize the image to 32x32\n",
" resized_img_array = transform.resize(img_array, (32, 32))\n",
"\n",
" # Convert the RGB image to grayscale\n",
" gray_img_array = color.rgb2gray(resized_img_array)\n",
"\n",
" # Flatten the image to 1024\n",
" flattened_img_array = gray_img_array.flatten()\n",
" \n",
" label = convert_letter(single_letter)\n",
" add_to_dataset(flattened_img_array,df1,i,label)\n",
" i = i + 1\n",
"\n",
" # Append the processed image data to the list\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "02056706-60c3-4390-990a-9f84bb56c049",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df1.to_csv('cro_data_test.csv', index = False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66da76d9-6286-4e1e-b91e-007f43642c14",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}