Spaces:

ikraamkb
/

DataVisualization

Runtime error

App Files Files Community

ikraamkb commited on 2 days ago

Commit

fba174a

verified ·

1 Parent(s): ef25d94

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -21

app.py CHANGED Viewed

@@ -1,25 +1,37 @@
 from fastapi import FastAPI, File, UploadFile
 import pdfplumber
 import docx
 import openpyxl
 from pptx import Presentation
-import easyocr
 from transformers import pipeline
 import gradio as gr
 from fastapi.responses import RedirectResponse
-# Initialize FastAPI
 app = FastAPI()
-# Load AI Model for Question Answering
-qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large", tokenizer="google/flan-t5-large", use_fast=True)
-# Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
-    return " ".join(words[:max_tokens])
-# Functions to extract text from different file formats
 def extract_text_from_pdf(pdf_file):
     text = ""
     with pdfplumber.open(pdf_file) as pdf:
@@ -53,7 +65,6 @@ def extract_text_from_image(image_file):
     result = reader.readtext(image_file)
     return " ".join([res[1] for res in result])
-# Function to answer questions based on document content
 def answer_question_from_document(file, question):
     file_ext = file.name.split(".")[-1].lower()
@@ -70,26 +81,27 @@ def answer_question_from_document(file, question):
     if not text:
         return "No text extracted from the document."
-    truncated_text = truncate_text(text)
-    input_text = f"Question: {question} Context: {truncated_text}"
     response = qa_pipeline(input_text)
-    return response[0]["generated_text"]
-# Function to answer questions based on image content
 def answer_question_from_image(image, question):
     image_text = extract_text_from_image(image)
     if not image_text:
         return "No text detected in the image."
-    truncated_text = truncate_text(image_text)
     input_text = f"Question: {question} Context: {truncated_text}"
     response = qa_pipeline(input_text)
     return response[0]["generated_text"]
-# Gradio UI for Document & Image QA
 doc_interface = gr.Interface(
     fn=answer_question_from_document,
     inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
@@ -104,10 +116,63 @@ img_interface = gr.Interface(
     title="AI Image Question Answering"
 )
-# Mount Gradio Interfaces
-demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
-    return RedirectResponse(url="/")

 from fastapi import FastAPI, File, UploadFile
 import pdfplumber
+import pytesseract
+from PIL import Image
+import easyocr
 import docx
 import openpyxl
 from pptx import Presentation
 from transformers import pipeline
 import gradio as gr
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
 from fastapi.responses import RedirectResponse
+import io
+# ✅ Initialize FastAPI
 app = FastAPI()
+# ✅ Load AI Models
+from transformers import pipeline
+qa_pipeline = pipeline("text2text-generation",model="google/flan-t5-large",tokenizer="google/flan-t5-large",use_fast=True,device=0)
+table_analyzer = pipeline("table-question-answering",model="google/tapas-large-finetuned-wtq",tokenizer="google/tapas-large-finetuned-wtq",use_fast=True,device=0)
+code_generator = pipeline("text-generation",model="openai-community/gpt2-medium",tokenizer="openai-community/gpt2-medium",use_fast=True,device=0)
+vqa_pipeline = pipeline("image-to-text",model="Salesforce/blip-vqa-base",device=0 )
+# ✅ Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
+    return " ".join(words[:max_tokens])  # ✅ Keeps only the first 450 words
+# ✅ Functions for Document & Image QA
 def extract_text_from_pdf(pdf_file):
     text = ""
     with pdfplumber.open(pdf_file) as pdf:
     result = reader.readtext(image_file)
     return " ".join([res[1] for res in result])
 def answer_question_from_document(file, question):
     file_ext = file.name.split(".")[-1].lower()
     if not text:
         return "No text extracted from the document."
+    truncated_text = truncate_text(text)  # ✅ Prevents token limit error
+    input_text = f"Question: {question} Context: {truncated_text}"  # ✅ Proper FLAN-T5 format
     response = qa_pipeline(input_text)
+    return response[0]["generated_text"]  # ✅ Returns the correct output
 def answer_question_from_image(image, question):
     image_text = extract_text_from_image(image)
     if not image_text:
         return "No text detected in the image."
+    truncated_text = truncate_text(image_text)  # ✅ Prevents token limit error
     input_text = f"Question: {question} Context: {truncated_text}"
     response = qa_pipeline(input_text)
     return response[0]["generated_text"]
+# ✅ Gradio UI for Document & Image QA
 doc_interface = gr.Interface(
     fn=answer_question_from_document,
     inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
     title="AI Image Question Answering"
 )
+# ✅ Data Visualization Function
+def generate_visualization(excel_file, viz_type, user_request):
+    try:
+        df = pd.read_excel(excel_file)
+        df = df.astype(str).fillna("")
+        table_input = {
+            "table": df.to_dict(orient="records"),
+            "query": user_request.strip() if isinstance(user_request, str) else "What is the summary?"
+        }
+        table_answer = table_analyzer(**table_input)
+        prompt = (
+            f"Given a dataset with columns {list(df.columns)}, generate Python code using Matplotlib and Seaborn "
+            f"to create a {viz_type.lower()} based on: {user_request}. Only return valid Python code, no explanations."
+        )
+        code_response = code_generator(prompt, max_new_tokens=150, do_sample=True)
+        if isinstance(code_response, list) and "generated_text" in code_response[0]:
+            generated_code = code_response[0]["generated_text"]
+        else:
+            generated_code = "Error: Model did not return valid code."
+        try:
+            exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
+            exec(generated_code, exec_globals)
+            fig = plt.gcf()
+            img_buf = io.BytesIO()
+            fig.savefig(img_buf, format='png')
+            img_buf.seek(0)
+            plt.close(fig)
+        except Exception as e:
+            return generated_code, f"Error in executing visualization: {str(e)}"
+        return generated_code, img_buf
+    except Exception as e:
+        return f"Error: {str(e)}", "Failed to analyze table."
+# ✅ Gradio UI for Data Visualization
+viz_interface = gr.Interface(
+    fn=generate_visualization,
+    inputs=[
+        gr.File(label="Upload Excel File"),
+        gr.Radio(["Bar Chart", "Line Chart", "Scatter Plot", "Histogram"], label="Choose Visualization Type"),
+        gr.Textbox(label="Enter Visualization Request")
+    ],
+    outputs=[gr.Code(label="Generated Python Code"), gr.Image(label="Visualization Output")],
+    title="AI-Powered Data Visualization"
+)
+# ✅ Mount Gradio Interfaces
+demo = gr.TabbedInterface([doc_interface, img_interface, viz_interface], ["Document QA", "Image QA"])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
+    return RedirectResponse(url="/")