# app.py import gradio as gr from transformers import pipeline, AutoModel, AutoProcessor import torch import os import numpy as np from PIL import Image # Initialize models (outside process function) summarizer = pipeline("summarization", "csebuetnlp/mT5_multilingual_XLSum") translator_ar2en = pipeline("translation_ar_to_en", "Helsinki-NLP/opus-mt-ar-en") clip_model = AutoModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32") # Image preprocessing def precompute_embeddings(image_dir="images"): image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] embeddings = [] for path in image_paths: image = Image.open(path) inputs = clip_processor(images=image, return_tensors="pt") with torch.no_grad(): embeddings.append(clip_model.get_image_features(**inputs)) return image_paths, torch.cat(embeddings) image_paths, image_embeddings = precompute_embeddings() def process(input_text, language): # Text summarization summary = summarizer(input_text, max_length=150, min_length=30)[0]['summary_text'] # Translation if Arabic if language == "Arabic": translated = translator_ar2en(summary)[0]['translation_text'] query_text = translated else: query_text = summary # Text-image retrieval text_inputs = clip_processor( text=query_text, return_tensors="pt", padding=True, truncation=True ) with torch.no_grad(): text_emb = clip_model.get_text_features(**text_inputs) similarities = (text_emb @ image_embeddings.T).softmax(dim=-1) top_indices = similarities.topk(3).indices.numpy() results = [image_paths[i] for i in top_indices] return summary, translated if language == "Arabic" else "", results # Gradio interface with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🌍 Multi-Task AI: Summarization & Image Retrieval") with gr.Row(): lang = gr.Dropdown(["English", "Arabic"], label="Input Language") text_input = gr.Textbox(label="Input Text", lines=5) with gr.Row(): summary_out = gr.Textbox(label="Summary") trans_out = gr.Textbox(label="English Query Text", visible=False) gallery = gr.Gallery(label="Retrieved Images", columns=3) submit = gr.Button("Process", variant="primary") def toggle_translation(lang): return gr.update(visible=lang == "Arabic") lang.change(toggle_translation, lang, trans_out) submit.click(process, [text_input, lang], [summary_out, trans_out, gallery]) demo.launch()