Spaces:

ikraamkb
/

qtAnswering

Running

App Files Files Community

ikraamkb commited on Mar 28

Commit

1cd6a53

verified ·

1 Parent(s): 3b3ce11

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -139

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""import gradio as gr
 import uvicorn
 import numpy as np
 import fitz  # PyMuPDF
@@ -119,141 +119,3 @@ def home():
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)
-import gradio as gr
-import uvicorn
-import numpy as np
-import pymupdf
-import tika
-import torch
-from fastapi import FastAPI
-from transformers import pipeline
-from PIL import Image
-from io import BytesIO
-from starlette.responses import RedirectResponse
-from tika import parser
-from openpyxl import load_workbook
-import os
-# Initialize Tika for DOCX & PPTX parsing
-tika.initVM()
-# Initialize FastAPI
-app = FastAPI()
-# Load models
-device = "cuda" if torch.cuda.is_available() else "cpu"
-qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device)
-image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
-# ✅ Function to Validate File Type
-def validate_file_type(file):
-    if isinstance(file, str):  # If it's text input (NamedString)
-        return None
-    if hasattr(file, "name") and file.name:
-        ext = file.name.split(".")[-1].lower()
-        if ext not in ALLOWED_EXTENSIONS:
-            return f"❌ Unsupported file format: {ext}"
-        return None
-    return "❌ Invalid file format!"
-# ✅ Extract Text from PDF
-def extract_text_from_pdf(pdf_bytes):
-    doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
-    return "\n".join([page.get_text() for page in doc])
-# ✅ Extract Text from DOCX & PPTX using Tika
-def extract_text_with_tika(file_bytes):
-    parsed = parser.from_buffer(file_bytes)
-    return parsed.get("content", "").strip() if parsed else ""
-# ✅ Extract Text from Excel
-def extract_text_from_excel(file_path):
-    wb = load_workbook(file_path, data_only=True)
-    text = []
-    for sheet in wb.worksheets:
-        for row in sheet.iter_rows(values_only=True):
-            text.append(" ".join(str(cell) for cell in row if cell))
-    return "\n".join(text)
-# ✅ Truncate Long Text for Model
-def truncate_text(text, max_length=2048):
-    return text[:max_length] if len(text) > max_length else text
-# ✅ Answer Questions from Image or Document
-def answer_question(file, question: str):
-    # 🖼️ Handle Image Input (Gradio sends NumPy arrays)
-    if isinstance(file, np.ndarray):
-        image = Image.fromarray(file)
-        caption = image_captioning_pipeline(image)[0]['generated_text']
-        response = qa_pipeline(f"Question: {question}\nContext: {caption}")
-        return response[0]["generated_text"]
-    # Validate File
-    validation_error = validate_file_type(file)
-    if validation_error:
-        return validation_error
-    file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
-    # 🛠️ Fix: Read File Bytes Correctly (Gradio Provides File Path)
-    try:
-        with open(file.name, "rb") as f:
-            file_bytes = f.read()
-    except Exception as e:
-        return f"❌ Error reading file: {str(e)}"
-    if not file_bytes:
-        return "❌ Could not read file content!"
-    # 📝 Extract Text from Supported Documents
-    if file_ext == "pdf":
-        text = extract_text_from_pdf(file_bytes)
-    elif file_ext in ["docx", "pptx"]:
-        text = extract_text_with_tika(file_bytes)
-    elif file_ext == "xlsx":
-        text = extract_text_from_excel(file.name)
-    else:
-        return "❌ Unsupported file format!"
-    if not text.strip():
-        return "⚠️ No text extracted from the document."
-    # 🔥 Run Model on Extracted Text
-    truncated_text = truncate_text(text)
-    response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
-    return response[0]["generated_text"]
-# ✅ Gradio Interface (Unified for Images & Documents)
-with gr.Blocks() as demo:
-    gr.Markdown("## 📄 AI-Powered Document & Image QA")
-    with gr.Row():
-        file_input = gr.File(label="Upload Document / Image")
-        question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
-    answer_output = gr.Textbox(label="Answer")
-    submit_btn = gr.Button("Get Answer")
-    submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
-# ✅ Mount Gradio with FastAPI
-app = gr.mount_gradio_app(app, demo, path="/")
-@app.get("/")
-def home():
-    return RedirectResponse(url="/")
-# ✅ Run FastAPI + Gradio
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)
-"""
-import subprocess
-# Check installed packages
-subprocess.run(["pip", "list"])
-# Check for dependency conflicts
-subprocess.run(["pip", "check"])

+import gradio as gr
 import uvicorn
 import numpy as np
 import fitz  # PyMuPDF
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)