!pip install transformers torch torchvision timm easyocr pytesseract gradio datasets huggingface_hub import gradio as gr import torch from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline from PIL import Image import requests # ✅ Load TrOCR model (Pretrained on Handwritten OCR) MODEL_NAME = "microsoft/trocr-base-handwritten" # ✅ Check if GPU is available device = "cuda" if torch.cuda.is_available() else "cpu" # ✅ Cache the model to prevent reloading on every request processor = TrOCRProcessor.from_pretrained(MODEL_NAME) model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME).to(device) # ✅ Function to extract text def extract_text(image): image = Image.open(image).convert("RGB") # Convert Image to Model Format pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device) # Generate Text from Model generated_ids = model.generate(pixel_values) extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return extracted_text # ✅ Load NLP Pipeline for Structuring nlp_pipeline = pipeline("ner", model="dslim/bert-base-NER") # ✅ Function to Structure Extracted Text def structure_text(text): ner_results = nlp_pipeline(text) structured_output = [] for entity in ner_results: structured_output.append(f"{entity['word']} ({entity['entity']})") return " ".join(structured_output) # ✅ Function to process document (OCR + NLP) def process_document(image): extracted_text = extract_text(image) structured_text = structure_text(extracted_text) return extracted_text, structured_text # ✅ Launch Gradio App iface = gr.Interface( fn=process_document, inputs="image", outputs=["text", "text"], title="TransformoDocs - AI Document Processor", description="Upload a scanned document or handwritten note. The AI will extract and structure the text.", ) iface.launch(share=True) # ✅ Use 'share=True' for public link