Spaces:

ikraamkb
/

Summarization

Sleeping

App Files Files Community

ikraamkb commited on Apr 25

Commit

1795a1a

verified ·

1 Parent(s): 0d83986

Update appImage.py

Browse files

Files changed (1) hide show

appImage.py +106 -3

appImage.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import gradio as gr
 from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import torch
@@ -21,7 +21,7 @@ except Exception as e:
     USE_GIT = False
 def generate_caption(image_path):
-    """Generate caption using the best available model"""
     try:
         if USE_GIT:
             image = Image.open(image_path)
@@ -36,7 +36,7 @@ def generate_caption(image_path):
         return "Could not generate caption"
 def process_image(file_path: str):
-    """Handle image processing for Gradio interface"""
     if not file_path:
         return "Please upload an image first"
@@ -71,3 +71,106 @@ app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def redirect_to_interface():
     return RedirectResponse(url="/")

+"""import gradio as gr
 from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import torch
     USE_GIT = False
 def generate_caption(image_path):
+    "Generate caption using the best available model""
     try:
         if USE_GIT:
             image = Image.open(image_path)
         return "Could not generate caption"
 def process_image(file_path: str):
+    "Handle image processing for Gradio interface"
     if not file_path:
         return "Please upload an image first"
 @app.get("/")
 def redirect_to_interface():
     return RedirectResponse(url="/")
+"""
+import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
+from PIL import Image
+import torch
+from fastapi import FastAPI, UploadFile, Form
+from fastapi.responses import RedirectResponse, JSONResponse, FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+import os
+import tempfile
+# ✅ Initialize FastAPI
+app = FastAPI()
+# ✅ Enable CORS (so frontend JS can call backend)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ✅ Load caption model
+USE_GIT = False
+try:
+    processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
+    git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
+    git_model.eval()
+    USE_GIT = True
+except Exception as e:
+    print(f"[INFO] Falling back to ViT: {e}")
+    captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+# ✅ Image captioning logic
+def generate_caption(image_path: str) -> str:
+    try:
+        if USE_GIT:
+            image = Image.open(image_path).convert("RGB")
+            inputs = processor(images=image, return_tensors="pt")
+            outputs = git_model.generate(**inputs, max_length=50)
+            caption = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+        else:
+            result = captioner(image_path)
+            caption = result[0]['generated_text']
+        return caption
+    except Exception as e:
+        return f"Error: {str(e)}"
+# ✅ For Gradio demo
+def process_image(file_path: str):
+    if not file_path:
+        return "Please upload an image."
+    return f"📷 Image Caption:\n{generate_caption(file_path)}"
+# ✅ FastAPI endpoint for frontend POSTs
+@app.post("/imagecaption/")
+async def caption_from_frontend(file: UploadFile, question: str = Form("")):
+    try:
+        # Save temp image
+        contents = await file.read()
+        tmp_path = os.path.join(tempfile.gettempdir(), file.filename)
+        with open(tmp_path, "wb") as f:
+            f.write(contents)
+        caption = generate_caption(tmp_path)
+        # Optionally generate audio
+        from gtts import gTTS
+        audio_path = os.path.join(tempfile.gettempdir(), file.filename + ".mp3")
+        tts = gTTS(text=caption)
+        tts.save(audio_path)
+        return {
+            "answer": caption,
+            "audio": f"/files/{os.path.basename(audio_path)}"
+        }
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+# ✅ Serve static files
+@app.get("/files/{file_name}")
+async def serve_file(file_name: str):
+    path = os.path.join(tempfile.gettempdir(), file_name)
+    if os.path.exists(path):
+        return FileResponse(path)
+    return JSONResponse({"error": "File not found"}, status_code=404)
+# ✅ Mount Gradio demo for test
+with gr.Blocks(title="🖼️ Image Captioning") as demo:
+    gr.Markdown("# 🖼️ Image Captioning Demo")
+    image_input = gr.Image(type="filepath", label="Upload Image")
+    result_box = gr.Textbox(label="Caption")
+    btn = gr.Button("Generate Caption")
+    btn.click(fn=process_image, inputs=[image_input], outputs=[result_box])
+app = gr.mount_gradio_app(app, demo, path="/")
+# ✅ Optional root redirect to frontend
+@app.get("/")
+def redirect_to_frontend():
+    return RedirectResponse(url="/templates/home.html")