In [None]:
import os
import hashlib
from PIL import Image
import cv2
import pandas
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil
import random



In [None]:
#REMOVE DUPLICATE IMAGES
def calculate_hash(image_path):

 #Calculate the hash of an image.
 with Image.open(image_path) as img:
 img = img.convert("RGB") # Ensure the image is in RGB format
 img = img.resize((8, 8)) # Resize to reduce size and create hash
 hash_value = hashlib.md5(img.tobytes()).hexdigest() # Create hash
 return hash_value

def find_and_remove_duplicates(folder_path):

 #Find and remove duplicate images in a given folder.

 #If cannot find path/ folder, Print that it does not exist
 if not os.path.exists(folder_path):

 print(f"The folder '{folder_path}' may not exist.")
 return

 print(f"Scanning folder: {folder_path}")

 hashes = {}
 duplicates = []

 for filename in os.listdir(folder_path):# for each file in the folder

 if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):# if file is an image

 file_path = os.path.join(folder_path, filename) #generate a path to the specific image

 img_hash = calculate_hash(file_path)

 if img_hash in hashes:
 duplicates.append(file_path) # Found a duplicate
 print(f"Duplicate found: {file_path} (duplicate of {hashes[img_hash]})")
 else:
 hashes[img_hash] = file_path

 # Remove duplicates
 for duplicate in duplicates:

 os.remove(duplicate)
 print(f"Removed duplicate: {duplicate}")

 if not duplicates:
 print("No duplicates found.")

if __name__ == '__main__':
 folder = input("Enter the path to the folder containing photos: ")
 find_and_remove_duplicates(folder)
 



In [None]:
#convert all images to jpg format
def convert_images(folder):
 # Loop through the image folder directory
 for filename in os.listdir(folder):
 # Check if the file is not in JPG format
 if not filename.lower().endswith('.jpg') and filename.lower().endswith(('.png', '.gif', '.bmp', '.jpeg')):
 input_path = os.path.join(folder, filename)
 output_path = os.path.join(folder, f"{os.path.splitext(filename)[0]}.jpg") #jpg converted path

 try:
 # Open the image file
 with Image.open(input_path) as img:
 # Convert the image to RGB
 rgb_img = img.convert('RGB')
 # Save image as JPG
 rgb_img.save(output_path, 'JPEG')
 print(f"Converted {filename} to {output_path}")
 # Remove the old image file
 os.remove(input_path)
 print(f"Removed old file: {input_path}")
 except Exception as e:
 print(f"Error processing {filename}: {e}")

 print("Image conversion to .jpg completed.") # Print once after processing all images

if __name__ == '__main__':
 input_folder = input("Enter the path to the input folder containing images: ")
 convert_images(input_folder)


In [17]:
# check for corruption
def is_corrupt(image_path):
 try:
 img = Image.open(image_path)
 img.verify() # Verify the image file
 return False # Image is not corrupted
 except (IOError, SyntaxError) as e:
 return True # Image is corrupted

def read_files_in_folder(folder_path):
 count=0
 for filename in os.listdir(folder_path):
 file_path = os.path.join(folder_path, filename)
 if is_corrupt(file_path):
 count+=1
 print("Image is corrupted:", file_path)
 return count
if __name__ == '__main__':
 input_folder = input("Enter the path to the input folder containing images: ")
 is_corrupt(input_folder)

In [None]:
# CREATE TEST DATA
source_directory = input("Enter source directory: ")
destination_directory = input("Enter destinaton directory: ")

#get the total number of files in the directory
count = 0
for file in os.listdir(source_directory):
 all_files = file
 count += 1

#get the list of files
all_files = os.listdir(source_directory)

#get percentage of files to move and sample
twenty_percent = count//5

files_to_move = random.sample(all_files, twenty_percent)


for each_file in files_to_move:
 source_file = os.path.join(source_directory, each_file)
 destination_file = os.path.join(destination_directory, each_file)
 
 # move the file
 shutil.move(source_file, destination_file) 

In [None]:
# assess the contrast quality of each image (overall distribution of pixel intensities in the image.)
def check_histogram_quality(gray):
 hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
 hist_sum = hist.sum()
 hist_normalized = hist / hist_sum
 hist_std = hist_normalized.std()
 return hist_std

# checks the sharpness level of each image by applying Laplacian algorithm
def check_sharpness(gray):
 return cv2.Laplacian(gray, cv2.CV_64F).var()

# checks the mean variance of each image
def check_mean_variance(gray):
 mean_intensity = np.mean(gray)
 variance_intensity = np.var(gray)
 return mean_intensity, variance_intensity

# Returns result based on the quality of each image
def check_image_quality(folder):
 results = [] # Collect results for all images
 for filename in os.listdir(folder):
 if filename.lower().endswith('.jpg'):
 image_path = os.path.join(folder, filename)
 print(f"Processing: {filename}") 
 image = cv2.imread(image_path)
 if image is None:
 results.append(f"{filename}: Error: Image not found.")
 continue # Skip to the next image

 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

 # Quality assessments
 hist_std = check_histogram_quality(gray)
 sharpness = check_sharpness(gray)
 mean_intensity, variance_intensity = check_mean_variance(gray)

 quality_issues = []

 print(f"hist_std for {image_path}: {hist_std}")

 #Histogram quality check
 if hist_std <= 0.1:
 quality_issues.append("Histogram variance is low; consider improving contrast.")
 
 # Sharpness check
 if sharpness < 100: # Adjust as necessary
 quality_issues.append("Image is blurry; consider sharpening.")

 # Mean intensity check
 if mean_intensity <= 50:
 quality_issues.append("Image may be underexposed; consider brightening.")
 elif mean_intensity >= 200:
 quality_issues.append("Image may be overexposed; consider reducing brightness.")
 
 # Variance check
 if variance_intensity < 1000: # Adjust threshold as necessary
 quality_issues.append("Image has low intensity variance; check for flat areas.")

 # Report results for this image
 if quality_issues:
 results.append(f"{filename}: Image quality is not satisfactory. Issues found:\n- " + "\n- ".join(quality_issues))
 else:
 results.append(f"{filename}: Image quality is good.")

 return "\n".join(results) # Return results for all images


if __name__ == "__main__":
 input_folder = input("Enter the path to the input folder containing images: ")
 result = check_image_quality(input_folder)
 print(result)