{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "68e2dccb-3f52-4ea3-bf1d-8732641daefa", "metadata": {}, "outputs": [], "source": [ "import os\n", "import hashlib\n", "from PIL import Image\n", "import cv2\n", "import pandas\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os\n", "import shutil\n", "import random\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b5f7e8cb-1c1e-423a-b7c5-68a41c3eeec3", "metadata": {}, "outputs": [], "source": [ "#REMOVE DUPLICATE IMAGES\n", "def calculate_hash(image_path):\n", "\n", " #Calculate the hash of an image.\n", " with Image.open(image_path) as img:\n", " img = img.convert(\"RGB\") # Ensure the image is in RGB format\n", " img = img.resize((8, 8)) # Resize to reduce size and create hash\n", " hash_value = hashlib.md5(img.tobytes()).hexdigest() # Create hash\n", " return hash_value\n", "\n", "def find_and_remove_duplicates(folder_path):\n", "\n", " #Find and remove duplicate images in a given folder.\n", "\n", " #If cannot find path/ folder, Print that it does not exist\n", " if not os.path.exists(folder_path):\n", "\n", " print(f\"The folder '{folder_path}' may not exist.\")\n", " return\n", "\n", " print(f\"Scanning folder: {folder_path}\")\n", "\n", " hashes = {}\n", " duplicates = []\n", "\n", " for filename in os.listdir(folder_path):# for each file in the folder\n", "\n", " if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):# if file is an image\n", "\n", " file_path = os.path.join(folder_path, filename) #generate a path to the specific image\n", "\n", " img_hash = calculate_hash(file_path)\n", "\n", " if img_hash in hashes:\n", " duplicates.append(file_path) # Found a duplicate\n", " print(f\"Duplicate found: {file_path} (duplicate of {hashes[img_hash]})\")\n", " else:\n", " hashes[img_hash] = file_path\n", "\n", " # Remove duplicates\n", " for duplicate in duplicates:\n", "\n", " os.remove(duplicate)\n", " print(f\"Removed duplicate: {duplicate}\")\n", "\n", " if not duplicates:\n", " print(\"No duplicates found.\")\n", "\n", "if __name__ == '__main__':\n", " folder = input(\"Enter the path to the folder containing photos: \")\n", " find_and_remove_duplicates(folder)\n", " \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "73265e47-6308-4802-be5c-8eb953148d63", "metadata": {}, "outputs": [], "source": [ "#convert all images to jpg format\n", "def convert_images(folder):\n", " # Loop through the image folder directory\n", " for filename in os.listdir(folder):\n", " # Check if the file is not in JPG format\n", " if not filename.lower().endswith('.jpg') and filename.lower().endswith(('.png', '.gif', '.bmp', '.jpeg')):\n", " input_path = os.path.join(folder, filename)\n", " output_path = os.path.join(folder, f\"{os.path.splitext(filename)[0]}.jpg\") #jpg converted path\n", "\n", " try:\n", " # Open the image file\n", " with Image.open(input_path) as img:\n", " # Convert the image to RGB\n", " rgb_img = img.convert('RGB')\n", " # Save image as JPG\n", " rgb_img.save(output_path, 'JPEG')\n", " print(f\"Converted {filename} to {output_path}\")\n", " # Remove the old image file\n", " os.remove(input_path)\n", " print(f\"Removed old file: {input_path}\")\n", " except Exception as e:\n", " print(f\"Error processing {filename}: {e}\")\n", "\n", " print(\"Image conversion to .jpg completed.\") # Print once after processing all images\n", "\n", "if __name__ == '__main__':\n", " input_folder = input(\"Enter the path to the input folder containing images: \")\n", " convert_images(input_folder)\n" ] }, { "cell_type": "code", "execution_count": 17, "id": "9d4cea00-fc10-4ca4-a139-0dcd259b2767", "metadata": {}, "outputs": [], "source": [ "# check for corruption\n", "def is_corrupt(image_path):\n", " try:\n", " img = Image.open(image_path)\n", " img.verify() # Verify the image file\n", " return False # Image is not corrupted\n", " except (IOError, SyntaxError) as e:\n", " return True # Image is corrupted\n", "\n", "def read_files_in_folder(folder_path):\n", " count=0\n", " for filename in os.listdir(folder_path):\n", " file_path = os.path.join(folder_path, filename)\n", " if is_corrupt(file_path):\n", " count+=1\n", " print(\"Image is corrupted:\", file_path)\n", " return count\n", "if __name__ == '__main__':\n", " input_folder = input(\"Enter the path to the input folder containing images: \")\n", " is_corrupt(input_folder)" ] }, { "cell_type": "code", "execution_count": null, "id": "1ce74fa7", "metadata": {}, "outputs": [], "source": [ "# CREATE TEST DATA\n", "source_directory = input(\"Enter source directory: \")\n", "destination_directory = input(\"Enter destinaton directory: \")\n", "\n", "#get the total number of files in the directory\n", "count = 0\n", "for file in os.listdir(source_directory):\n", " all_files = file\n", " count += 1\n", "\n", "#get the list of files\n", "all_files = os.listdir(source_directory)\n", "\n", "#get percentage of files to move and sample\n", "twenty_percent = count//5\n", "\n", "files_to_move = random.sample(all_files, twenty_percent)\n", "\n", "\n", "for each_file in files_to_move:\n", " source_file = os.path.join(source_directory, each_file)\n", " destination_file = os.path.join(destination_directory, each_file)\n", " \n", " # move the file\n", " shutil.move(source_file, destination_file) " ] }, { "cell_type": "code", "execution_count": null, "id": "7668aa65-2fb1-4770-9e6a-50e378f7150e", "metadata": {}, "outputs": [], "source": [ "# assess the contrast quality of each image (overall distribution of pixel intensities in the image.)\n", "def check_histogram_quality(gray):\n", " hist = cv2.calcHist([gray], [0], None, [256], [0, 256])\n", " hist_sum = hist.sum()\n", " hist_normalized = hist / hist_sum\n", " hist_std = hist_normalized.std()\n", " return hist_std\n", "\n", "# checks the sharpness level of each image by applying Laplacian algorithm\n", "def check_sharpness(gray):\n", " return cv2.Laplacian(gray, cv2.CV_64F).var()\n", "\n", "# checks the mean variance of each image\n", "def check_mean_variance(gray):\n", " mean_intensity = np.mean(gray)\n", " variance_intensity = np.var(gray)\n", " return mean_intensity, variance_intensity\n", "\n", "# Returns result based on the quality of each image\n", "def check_image_quality(folder):\n", " results = [] # Collect results for all images\n", " for filename in os.listdir(folder):\n", " if filename.lower().endswith('.jpg'):\n", " image_path = os.path.join(folder, filename)\n", " print(f\"Processing: {filename}\") \n", " image = cv2.imread(image_path)\n", " if image is None:\n", " results.append(f\"{filename}: Error: Image not found.\")\n", " continue # Skip to the next image\n", "\n", " gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", "\n", " # Quality assessments\n", " hist_std = check_histogram_quality(gray)\n", " sharpness = check_sharpness(gray)\n", " mean_intensity, variance_intensity = check_mean_variance(gray)\n", "\n", " quality_issues = []\n", "\n", " print(f\"hist_std for {image_path}: {hist_std}\")\n", "\n", " #Histogram quality check\n", " if hist_std <= 0.1:\n", " quality_issues.append(\"Histogram variance is low; consider improving contrast.\")\n", " \n", " # Sharpness check\n", " if sharpness < 100: # Adjust as necessary\n", " quality_issues.append(\"Image is blurry; consider sharpening.\")\n", "\n", " # Mean intensity check\n", " if mean_intensity <= 50:\n", " quality_issues.append(\"Image may be underexposed; consider brightening.\")\n", " elif mean_intensity >= 200:\n", " quality_issues.append(\"Image may be overexposed; consider reducing brightness.\")\n", " \n", " # Variance check\n", " if variance_intensity < 1000: # Adjust threshold as necessary\n", " quality_issues.append(\"Image has low intensity variance; check for flat areas.\")\n", "\n", " # Report results for this image\n", " if quality_issues:\n", " results.append(f\"{filename}: Image quality is not satisfactory. Issues found:\\n- \" + \"\\n- \".join(quality_issues))\n", " else:\n", " results.append(f\"{filename}: Image quality is good.\")\n", "\n", " return \"\\n\".join(results) # Return results for all images\n", "\n", "\n", "if __name__ == \"__main__\":\n", " input_folder = input(\"Enter the path to the input folder containing images: \")\n", " result = check_image_quality(input_folder)\n", " print(result)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 5 }