{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68e2dccb-3f52-4ea3-bf1d-8732641daefa",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import hashlib\n",
    "from PIL import Image\n",
    "import cv2\n",
    "import pandas\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "import shutil\n",
    "import random\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5f7e8cb-1c1e-423a-b7c5-68a41c3eeec3",
   "metadata": {},
   "outputs": [],
   "source": [
    "#REMOVE DUPLICATE IMAGES\n",
    "def calculate_hash(image_path):\n",
    "\n",
    "    #Calculate the hash of an image.\n",
    "    with Image.open(image_path) as img:\n",
    "        img = img.convert(\"RGB\")  # Ensure the image is in RGB format\n",
    "        img = img.resize((8, 8))  # Resize to reduce size and create hash\n",
    "        hash_value = hashlib.md5(img.tobytes()).hexdigest()  # Create hash\n",
    "    return hash_value\n",
    "\n",
    "def find_and_remove_duplicates(folder_path):\n",
    "\n",
    "    #Find and remove duplicate images in a given folder.\n",
    "\n",
    "    #If cannot find path/ folder, Print that it does not exist\n",
    "    if not os.path.exists(folder_path):\n",
    "\n",
    "        print(f\"The folder '{folder_path}' may not exist.\")\n",
    "        return\n",
    "\n",
    "    print(f\"Scanning folder: {folder_path}\")\n",
    "\n",
    "    hashes = {}\n",
    "    duplicates = []\n",
    "\n",
    "    for filename in os.listdir(folder_path):# for each file in the folder\n",
    "\n",
    "        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):# if file is an image\n",
    "\n",
    "            file_path = os.path.join(folder_path, filename) #generate a path to the specific image\n",
    "\n",
    "            img_hash = calculate_hash(file_path)\n",
    "\n",
    "            if img_hash in hashes:\n",
    "                duplicates.append(file_path)  # Found a duplicate\n",
    "                print(f\"Duplicate found: {file_path} (duplicate of {hashes[img_hash]})\")\n",
    "            else:\n",
    "                hashes[img_hash] = file_path\n",
    "\n",
    "    # Remove duplicates\n",
    "    for duplicate in duplicates:\n",
    "\n",
    "        os.remove(duplicate)\n",
    "        print(f\"Removed duplicate: {duplicate}\")\n",
    "\n",
    "    if not duplicates:\n",
    "        print(\"No duplicates found.\")\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    folder = input(\"Enter the path to the folder containing photos: \")\n",
    "    find_and_remove_duplicates(folder)\n",
    "    \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73265e47-6308-4802-be5c-8eb953148d63",
   "metadata": {},
   "outputs": [],
   "source": [
    "#convert all images to jpg format\n",
    "def convert_images(folder):\n",
    "    # Loop through the image folder directory\n",
    "    for filename in os.listdir(folder):\n",
    "        # Check if the file is not in JPG format\n",
    "        if not filename.lower().endswith('.jpg') and filename.lower().endswith(('.png', '.gif', '.bmp', '.jpeg')):\n",
    "            input_path = os.path.join(folder, filename)\n",
    "            output_path = os.path.join(folder, f\"{os.path.splitext(filename)[0]}.jpg\") #jpg converted path\n",
    "\n",
    "            try:\n",
    "                # Open the image file\n",
    "                with Image.open(input_path) as img:\n",
    "                    # Convert the image to RGB\n",
    "                    rgb_img = img.convert('RGB')\n",
    "                    # Save image as JPG\n",
    "                    rgb_img.save(output_path, 'JPEG')\n",
    "                    print(f\"Converted {filename} to {output_path}\")\n",
    "                    # Remove the old image file\n",
    "                    os.remove(input_path)\n",
    "                    print(f\"Removed old file: {input_path}\")\n",
    "            except Exception as e:\n",
    "                print(f\"Error processing {filename}: {e}\")\n",
    "\n",
    "    print(\"Image conversion to .jpg completed.\")  # Print once after processing all images\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    input_folder = input(\"Enter the path to the input folder containing images: \")\n",
    "    convert_images(input_folder)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9d4cea00-fc10-4ca4-a139-0dcd259b2767",
   "metadata": {},
   "outputs": [],
   "source": [
    "# check for corruption\n",
    "def is_corrupt(image_path):\n",
    "    try:\n",
    "        img = Image.open(image_path)\n",
    "        img.verify()  # Verify the image file\n",
    "        return False  # Image is not corrupted\n",
    "    except (IOError, SyntaxError) as e:\n",
    "        return True  # Image is corrupted\n",
    "\n",
    "def read_files_in_folder(folder_path):\n",
    "    count=0\n",
    "    for filename in os.listdir(folder_path):\n",
    "         file_path = os.path.join(folder_path, filename)\n",
    "         if is_corrupt(file_path):\n",
    "            count+=1\n",
    "            print(\"Image is corrupted:\", file_path)\n",
    "    return count\n",
    "if __name__ == '__main__':\n",
    "    input_folder = input(\"Enter the path to the input folder containing images: \")\n",
    "    is_corrupt(input_folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ce74fa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# CREATE TEST DATA\n",
    "source_directory = input(\"Enter source directory: \")\n",
    "destination_directory = input(\"Enter destinaton directory: \")\n",
    "\n",
    "#get the total number of files in the directory\n",
    "count = 0\n",
    "for file in os.listdir(source_directory):\n",
    "    all_files = file\n",
    "    count += 1\n",
    "\n",
    "#get the list of files\n",
    "all_files = os.listdir(source_directory)\n",
    "\n",
    "#get percentage of files to move and sample\n",
    "twenty_percent = count//5\n",
    "\n",
    "files_to_move = random.sample(all_files, twenty_percent)\n",
    "\n",
    "\n",
    "for each_file in files_to_move:\n",
    "    source_file = os.path.join(source_directory, each_file)\n",
    "    destination_file = os.path.join(destination_directory, each_file)\n",
    "    \n",
    "    # move the file\n",
    "    shutil.move(source_file, destination_file)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7668aa65-2fb1-4770-9e6a-50e378f7150e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# assess the contrast quality of each image (overall distribution of pixel intensities in the image.)\n",
    "def check_histogram_quality(gray):\n",
    "    hist = cv2.calcHist([gray], [0], None, [256], [0, 256])\n",
    "    hist_sum = hist.sum()\n",
    "    hist_normalized = hist / hist_sum\n",
    "    hist_std = hist_normalized.std()\n",
    "    return hist_std\n",
    "\n",
    "# checks the sharpness level of each image by applying Laplacian algorithm\n",
    "def check_sharpness(gray):\n",
    "    return cv2.Laplacian(gray, cv2.CV_64F).var()\n",
    "\n",
    "# checks the mean variance of each image\n",
    "def check_mean_variance(gray):\n",
    "    mean_intensity = np.mean(gray)\n",
    "    variance_intensity = np.var(gray)\n",
    "    return mean_intensity, variance_intensity\n",
    "\n",
    "# Returns result based on the quality of each image\n",
    "def check_image_quality(folder):\n",
    "    results = []  # Collect results for all images\n",
    "    for filename in os.listdir(folder):\n",
    "        if filename.lower().endswith('.jpg'):\n",
    "            image_path = os.path.join(folder, filename)\n",
    "            print(f\"Processing: {filename}\") \n",
    "            image = cv2.imread(image_path)\n",
    "            if image is None:\n",
    "                results.append(f\"{filename}: Error: Image not found.\")\n",
    "                continue  # Skip to the next image\n",
    "\n",
    "            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
    "\n",
    "            # Quality assessments\n",
    "            hist_std = check_histogram_quality(gray)\n",
    "            sharpness = check_sharpness(gray)\n",
    "            mean_intensity, variance_intensity = check_mean_variance(gray)\n",
    "\n",
    "            quality_issues = []\n",
    "\n",
    "            print(f\"hist_std for {image_path}: {hist_std}\")\n",
    "\n",
    "            #Histogram quality check\n",
    "            if hist_std <= 0.1:\n",
    "                quality_issues.append(\"Histogram variance is low; consider improving contrast.\")\n",
    "            \n",
    "            # Sharpness check\n",
    "            if sharpness < 100:  # Adjust as necessary\n",
    "                quality_issues.append(\"Image is blurry; consider sharpening.\")\n",
    "\n",
    "            # Mean intensity check\n",
    "            if mean_intensity <= 50:\n",
    "                quality_issues.append(\"Image may be underexposed; consider brightening.\")\n",
    "            elif mean_intensity >= 200:\n",
    "                quality_issues.append(\"Image may be overexposed; consider reducing brightness.\")\n",
    "            \n",
    "            # Variance check\n",
    "            if variance_intensity < 1000:  # Adjust threshold as necessary\n",
    "                quality_issues.append(\"Image has low intensity variance; check for flat areas.\")\n",
    "\n",
    "            # Report results for this image\n",
    "            if quality_issues:\n",
    "                results.append(f\"{filename}: Image quality is not satisfactory. Issues found:\\n- \" + \"\\n- \".join(quality_issues))\n",
    "            else:\n",
    "                results.append(f\"{filename}: Image quality is good.\")\n",
    "\n",
    "    return \"\\n\".join(results)  # Return results for all images\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    input_folder = input(\"Enter the path to the input folder containing images: \")\n",
    "    result = check_image_quality(input_folder)\n",
    "    print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}