from fastapi import FastAPI from fastapi.responses import RedirectResponse from transformers import pipeline from PIL import Image import fitz # PyMuPDF import docx import pptx import openpyxl import io import gradio as gr # Initialize models summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # FastAPI app app = FastAPI() # ------------------------- # Helper Functions # ------------------------- def extract_text_from_pdf(upload): try: file_bytes = upload.read() stream = io.BytesIO(file_bytes) with fitz.open(stream=stream, filetype="pdf") as doc: return "\n".join([page.get_text() for page in doc]) except Exception as e: return f"❌ PDF extraction error: {e}" def extract_text_from_docx(upload): try: file_bytes = upload.read() stream = io.BytesIO(file_bytes) doc = docx.Document(stream) return "\n".join(p.text for p in doc.paragraphs if p.text.strip()) except Exception as e: return f"❌ DOCX extraction error: {e}" def extract_text_from_pptx(upload): try: file_bytes = upload.read() stream = io.BytesIO(file_bytes) prs = pptx.Presentation(stream) text = [] for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) return "\n".join(text) except Exception as e: return f"❌ PPTX extraction error: {e}" def extract_text_from_xlsx(upload): try: file_bytes = upload.read() stream = io.BytesIO(file_bytes) wb = openpyxl.load_workbook(stream) text = [] for sheet in wb.sheetnames: ws = wb[sheet] for row in ws.iter_rows(values_only=True): text.append(" ".join(str(cell) for cell in row if cell)) return "\n".join(text) except Exception as e: return f"❌ XLSX extraction error: {e}" # ------------------------- # Core Functions # ------------------------- def summarize_document(upload): if not upload: return "⚠️ No file uploaded." ext = upload.name.lower() upload.seek(0) if ext.endswith(".pdf"): text = extract_text_from_pdf(upload) elif ext.endswith(".docx"): text = extract_text_from_docx(upload) elif ext.endswith(".pptx"): text = extract_text_from_pptx(upload) elif ext.endswith(".xlsx"): text = extract_text_from_xlsx(upload) else: return "❌ Unsupported file type." if not text or not text.strip() or text.startswith("❌"): return text if text.startswith("❌") else "❗ No extractable text found." try: summary = summarizer(text[:3000], max_length=150, min_length=30, do_sample=False) return f"📄 Summary:\n{summary[0]['summary_text']}" except Exception as e: return f"⚠️ Summarization error: {e}" def interpret_image(image): if not image: return "⚠️ No image uploaded." try: return f"🖼️ Caption:\n{image_captioner(image)[0]['generated_text']}" except Exception as e: return f"⚠️ Image captioning error: {e}" # ------------------------- # Gradio Interface # ------------------------- doc_ui = gr.Interface( fn=summarize_document, inputs=gr.File(label="Upload a Document (PDF, DOCX, PPTX, XLSX)"), outputs=gr.Textbox(label="Summary"), title="📄 Document Summarizer" ) img_ui = gr.Interface( fn=interpret_image, inputs=gr.Image(type="pil", label="Upload an Image"), outputs=gr.Textbox(label="Caption"), title="🖼️ Image Interpreter" ) demo = gr.TabbedInterface([doc_ui, img_ui], ["Document Summarization", "Image Captioning"]) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def redirect_to_ui(): return RedirectResponse(url="/")