152 lines
3.6 KiB
Text
152 lines
3.6 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "0a9a579c-df1c-4742-a0ed-e3db27bdc3e4",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd \n",
|
|
"import os\n",
|
|
"import imageio\n",
|
|
"from skimage import color, transform\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "99e5b7bf-4438-477c-9880-008fb864ae56",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"image_folder = 'data/Cro_Training_Set/'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "da337206-82a6-416c-b5db-04466695a7b4",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def convert_letter(x):\n",
|
|
" if x == 'A':\n",
|
|
" y = 0\n",
|
|
" if x == 'B':\n",
|
|
" y = 1\n",
|
|
" if x == 'C':\n",
|
|
" y = 2\n",
|
|
" if x == 'D':\n",
|
|
" y = 3\n",
|
|
" if x == 'E':\n",
|
|
" y = 4\n",
|
|
" return y\n",
|
|
"def add_to_dataset(x,y,z,l):\n",
|
|
" y.at[z,'Label'] = l\n",
|
|
" for i in range(0,1024):\n",
|
|
" y.at[z,docnames[i+1]] = x[i]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "0d28e8d0-9386-4ccd-a7bf-f19e9d59d6f6",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"docnames = [\"Pixel \" + str(i) for i in range(1024)]\n",
|
|
"docnames.insert(0, 'Label')\n",
|
|
"df1 = pd.DataFrame(columns = docnames) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "ce03500d-7d48-493f-a00c-94c1d79bf02d",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"i = 0\n",
|
|
"for filename in os.listdir(image_folder):\n",
|
|
" file_path = os.path.join(image_folder, filename)\n",
|
|
"\n",
|
|
" # Check if the file is a PNG file\n",
|
|
" if filename.lower().endswith('.bmp'):\n",
|
|
" # Extract the single letter from the filename (adjust the index accordingly)\n",
|
|
" single_letter = filename[0]\n",
|
|
"\n",
|
|
" # Read the image and convert it to a NumPy array\n",
|
|
" img_array = imageio.v2.imread(file_path)\n",
|
|
"\n",
|
|
" # Resize the image to 32x32\n",
|
|
" resized_img_array = transform.resize(img_array, (32, 32))\n",
|
|
"\n",
|
|
" # Convert the RGB image to grayscale\n",
|
|
" gray_img_array = color.rgb2gray(resized_img_array)\n",
|
|
"\n",
|
|
" # Flatten the image to 1024\n",
|
|
" flattened_img_array = gray_img_array.flatten()\n",
|
|
" \n",
|
|
" label = convert_letter(single_letter)\n",
|
|
" add_to_dataset(flattened_img_array,df1,i,label)\n",
|
|
" i = i + 1\n",
|
|
"\n",
|
|
" # Append the processed image data to the list\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "02056706-60c3-4390-990a-9f84bb56c049",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1.to_csv('cro_data_test.csv', index = False) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "66da76d9-6286-4e1e-b91e-007f43642c14",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.17"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|