from pydrive2.auth import GoogleAuth from pydrive2.drive import GoogleDrive import os import gradio as gr from datasets import load_dataset, Dataset import pandas as pd from PIL import Image import pytesseract import cv2 import numpy as np import tensorflow as tf from transformers import LayoutLMv2Processor, LayoutLMv2ForSequenceClassification import torch from tqdm import tqdm import logging import re # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class CardPreprocessor: def __init__(self): # Initialize OCR and models self.processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") self.ocr_threshold = 0.5 def extract_text_regions(self, image): """Extract text regions from the image using OCR""" try: # Convert PIL Image to cv2 format img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # Preprocess image for better OCR gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Perform OCR text = pytesseract.image_to_data(thresh, output_type=pytesseract.Output.DICT) # Extract relevant information extracted_info = { 'player_name': None, 'team': None, 'year': None, 'card_number': None, 'brand': None, 'stats': [] } # Process OCR results for i, word in enumerate(text['text']): if word.strip(): conf = int(text['conf'][i]) if conf > 50: # Filter low-confidence detections # Try to identify year year_match = re.search(r'19[0-9]{2}|20[0-2][0-9]', word) if year_match: extracted_info['year'] = year_match.group() # Try to identify card number card_num_match = re.search(r'#\d+|\d+/\d+', word) if card_num_match: extracted_info['card_number'] = card_num_match.group() # Look for common card brands brands = ['topps', 'upper deck', 'panini', 'fleer', 'bowman'] if word.lower() in brands: extracted_info['brand'] = word.lower() # Look for statistics (numbers with common sports stats patterns) stats_match = re.search(r'\d+\s*(?:HR|RBI|AVG|YDS|TD)', word) if stats_match: extracted_info['stats'].append(stats_match.group()) return extracted_info except Exception as e: logger.error(f"Error in OCR processing: {str(e)}") return None def analyze_card_condition(self, image): """Analyze the physical condition of the card""" try: # Convert PIL Image to cv2 format img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # Edge detection for corner and edge analysis edges = cv2.Canny(img_cv, 100, 200) # Analyze corners corner_regions = { 'top_left': edges[0:50, 0:50], 'top_right': edges[0:50, -50:], 'bottom_left': edges[-50:, 0:50], 'bottom_right': edges[-50:, -50:] } corner_scores = {k: np.mean(v) for k, v in corner_regions.items()} # Analyze centering height, width = img_cv.shape[:2] center_x = width // 2 center_y = height // 2 # Calculate centering score centering_score = self.calculate_centering(img_cv, center_x, center_y) condition_info = { 'corner_scores': corner_scores, 'centering_score': centering_score, 'overall_condition': self.calculate_overall_condition(corner_scores, centering_score) } return condition_info except Exception as e: logger.error(f"Error in condition analysis: {str(e)}") return None def calculate_centering(self, image, center_x, center_y): """Calculate the centering score of the card""" try: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(gray, 50, 150) # Find contours contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if contours: # Find the largest contour (assumed to be the card) main_contour = max(contours, key=cv2.contourArea) x, y, w, h = cv2.boundingRect(main_contour) # Calculate centering scores x_score = abs(0.5 - (x + w/2) / image.shape[1]) y_score = abs(0.5 - (y + h/2) / image.shape[0]) return 1 - (x_score + y_score) / 2 return None except Exception as e: logger.error(f"Error in centering calculation: {str(e)}") return None def calculate_overall_condition(self, corner_scores, centering_score): """Calculate overall condition score""" if corner_scores and centering_score: corner_avg = sum(corner_scores.values()) / len(corner_scores) return (corner_avg + centering_score) / 2 return None def detect_orientation(self, image): """Detect if the card is portrait or landscape""" width, height = image.size return 'portrait' if height > width else 'landscape' class DatasetManager: def __init__(self, local_images_dir="downloaded_cards"): self.local_images_dir = local_images_dir self.drive = None self.dataset_name = "GotThatData/sports-cards" self.preprocessor = CardPreprocessor() # Create local directory if it doesn't exist os.makedirs(local_images_dir, exist_ok=True) def authenticate_drive(self): """Authenticate with Google Drive""" try: gauth = GoogleAuth() gauth.LocalWebserverAuth() self.drive = GoogleDrive(gauth) return True, "Successfully authenticated with Google Drive" except Exception as e: return False, f"Authentication failed: {str(e)}" def process_image(self, image_path): """Process a single image and extract information""" try: with Image.open(image_path) as img: # Extract text information text_info = self.preprocessor.extract_text_regions(img) # Analyze card condition condition_info = self.preprocessor.analyze_card_condition(img) # Get orientation orientation = self.preprocessor.detect_orientation(img) return { 'text_info': text_info, 'condition_info': condition_info, 'orientation': orientation } except Exception as e: logger.error(f"Error processing image {image_path}: {str(e)}") return None def generate_filename(self, info): """Generate filename based on extracted information""" year = info['text_info'].get('year', 'unknown_year') brand = info['text_info'].get('brand', 'unknown_brand') number = info['text_info'].get('card_number', '').replace('#', '').replace('/', '_') if not number: number = 'unknown_number' return f"sports_card_{year}_{brand}_{number}" def download_and_rename_files(self, drive_folder_id): """Download files from Google Drive and process them""" if not self.drive: return False, "Google Drive not authenticated", [] try: # List files in the folder query = f"'{drive_folder_id}' in parents and trashed=false" file_list = self.drive.ListFile({'q': query}).GetList() if not file_list: file = self.drive.CreateFile({'id': drive_folder_id}) if file: file_list = [file] else: return False, "No files found with the specified ID", [] processed_files = [] for i, file in enumerate(tqdm(file_list, desc="Processing files")): if file['mimeType'].startswith('image/'): temp_path = os.path.join(self.local_images_dir, f"temp_{i}.jpg") # Download file file.GetContentFile(temp_path) # Process image info = self.process_image(temp_path) if info: # Generate filename based on extracted info base_filename = self.generate_filename(info) new_filename = f"{base_filename}.jpg" final_path = os.path.join(self.local_images_dir, new_filename) # Rename file os.rename(temp_path, final_path) processed_files.append({ 'file_path': final_path, 'original_name': file['title'], 'new_name': new_filename, 'image': final_path, 'extracted_info': info['text_info'], 'condition': info['condition_info'], 'orientation': info['orientation'] }) else: os.remove(temp_path) return True, f"Successfully processed {len(processed_files)} images", processed_files except Exception as e: return False, f"Error processing files: {str(e)}", [] def update_huggingface_dataset(self, processed_files): """Update the sports-cards dataset with processed images""" try: # Create a DataFrame with the file information df = pd.DataFrame(processed_files) # Create a Hugging Face Dataset from the new files new_dataset = Dataset.from_pandas(df) try: # Try to load existing dataset existing_dataset = load_dataset(self.dataset_name) # Concatenate with existing dataset if it exists if 'train' in existing_dataset: new_dataset = concatenate_datasets([existing_dataset['train'], new_dataset]) except Exception: logger.info("Creating new dataset") # Push to Hugging Face Hub new_dataset.push_to_hub(self.dataset_name, split="train") return True, f"Successfully updated dataset '{self.dataset_name}' with {len(processed_files)} processed images" except Exception as e: return False, f"Error updating Hugging Face dataset: {str(e)}" def process_pipeline(folder_id): """Main pipeline to process images and update dataset""" manager = DatasetManager() # Step 1: Authenticate auth_success, auth_message = manager.authenticate_drive() if not auth_success: return auth_message # Step 2: Download and process files success, message, processed_files = manager.download_and_rename_files(folder_id) if not success: return message # Step 3: Update Hugging Face dataset success, hf_message = manager.update_huggingface_dataset(processed_files) # Create detailed report report = f"{message}\n{hf_message}\n\nDetailed Processing Report:\n" for file in processed_files: report += f"\nFile: {file['new_name']}\n" report += f"Extracted Info: {file['extracted_info']}\n" report += f"Condition Score: {file['condition']['overall_condition']:.2f}\n" report += f"Orientation: {file['orientation']}\n" report += "-" * 50 return report # Gradio interface demo = gr.Interface( fn=process_pipeline, inputs=[ gr.Textbox( label="Google Drive File/Folder ID", placeholder="Enter the ID from your Google Drive URL", value="151VOxPO91mg0C3ORiioGUd4hogzP1ujm" ) ], outputs=gr.Textbox(label="Processing Report"), title="AI-Powered Sports Cards Processor", description="Upload card images to automatically extract information, analyze condition, and add to dataset" ) if __name__ == "__main__": demo.launch()