ikraamkb commited on
Commit
324caeb
Β·
verified Β·
1 Parent(s): 12d5298

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -118
app.py CHANGED
@@ -1,131 +1,150 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- from fastapi import FastAPI
 
 
 
 
6
  from transformers import pipeline
 
7
  from fastapi.responses import RedirectResponse
8
- import io
9
- import ast
10
- from PIL import Image
11
- import re
12
-
13
- # βœ… Load AI models
14
- print("πŸš€ Initializing application...")
15
- table_analyzer = pipeline("table-question-answering", model="google/tapas-base-finetuned-wtq", device=-1)
16
- code_generator = pipeline("text-generation", model="EleutherAI/gpt-neo-125M", device=-1)
17
- print("βœ… AI models loaded successfully!")
18
 
19
- # βœ… Initialize FastAPI
20
  app = FastAPI()
21
 
22
- def generate_visualization(excel_file, viz_type, user_request):
23
- """Generates Python visualization code and insights based on user requests and Excel data."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  try:
25
- print("πŸ“‚ Loading Excel file...")
26
- df = pd.read_excel(excel_file)
27
- print("βœ… File loaded successfully! Columns:", df.columns)
28
-
29
- # Convert date columns
30
- for col in df.select_dtypes(include=["object", "datetime64"]):
31
- try:
32
- df[col] = pd.to_datetime(df[col], errors='coerce').dt.strftime('%Y-%m-%d %H:%M:%S')
33
- except Exception:
34
- pass
35
-
36
- df = df.fillna(0) # Fill NaN values
37
-
38
- formatted_table = [{col: str(value) for col, value in row.items()} for row in df.to_dict(orient="records")]
39
- print(f"πŸ“Š Formatted table: {formatted_table[:5]}")
40
- print(f"πŸ” User request: {user_request}")
41
-
42
- if not isinstance(user_request, str):
43
- raise ValueError("User request must be a string")
44
-
45
- print("🧠 Sending data to TAPAS model for analysis...")
46
- table_answer = table_analyzer({"table": formatted_table, "query": user_request})
47
- print("βœ… Table analysis completed!")
48
-
49
- # βœ… AI-generated code
50
- prompt = f"""
51
- Generate a **valid** Python Matplotlib script using the DataFrame `df` to visualize:
52
- - Columns: {list(df.columns)}
53
- - Visualization type: {viz_type}
54
- - User request: {user_request}
55
-
56
- Requirements:
57
- - Use `df` directly without reloading it.
58
- - Always include `plt.show()` at the end.
59
- - Ensure proper syntax (no missing imports or undefined variables).
60
- - Generate **only** the code (no extra text).
61
- """
62
-
63
-
64
- print("πŸ€– Sending request to AI code generator...")
65
- generated_code = code_generator(prompt, max_length=200)[0]['generated_text']
66
- print("πŸ“ AI-generated code:")
67
- print(generated_code)
68
-
69
- # βœ… Validate generated code
70
- valid_syntax = re.match(r".*plt\.show\(\).*", generated_code, re.DOTALL)
71
- if not valid_syntax:
72
- print("⚠️ AI code generation failed! Using fallback visualization...")
73
- return generated_code, "Error: The AI did not generate a valid Matplotlib script."
74
-
75
- try:
76
- ast.parse(generated_code) # Syntax validation
77
- except SyntaxError as e:
78
- return generated_code, f"Syntax error: {e}"
79
-
80
- # βœ… Execute AI-generated code
81
- try:
82
- print("⚑ Executing AI-generated code...")
83
- exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df.copy(), "io": io}
84
- exec(generated_code, exec_globals)
85
-
86
- fig = plt.gcf()
87
- img_buf = io.BytesIO()
88
- fig.savefig(img_buf, format='png')
89
- img_buf.seek(0)
90
- plt.close(fig)
91
- except Exception as e:
92
- print(f"❌ Error executing AI-generated code: {str(e)}")
93
- return generated_code, f"Error executing visualization: {str(e)}"
94
-
95
- img = Image.open(img_buf)
96
- return generated_code, img
97
 
 
 
 
 
 
 
 
 
 
98
  except Exception as e:
99
- print(f"❌ An error occurred: {str(e)}")
100
- return f"Error: {str(e)}", "Table analysis failed."
101
-
102
- # βœ… Gradio UI setup
103
- print("πŸ› οΈ Setting up Gradio interface...")
104
- gradio_ui = gr.Interface(
105
- fn=generate_visualization,
106
- inputs=[
107
- gr.File(label="Upload Excel File"),
108
- gr.Radio([
109
- "Bar Chart", "Line Chart", "Scatter Plot", "Histogram",
110
- "Boxplot", "Heatmap", "Pie Chart", "Area Chart", "Bubble Chart", "Violin Plot"
111
- ], label="Select Visualization Type"),
112
- gr.Textbox(label="Enter visualization request (e.g., 'Sales trend over time')")
113
- ],
114
- outputs=[
115
- gr.Code(label="Generated Python Code"),
116
- gr.Image(label="Visualization Result")
117
- ],
118
- title="AI-Powered Data Visualization πŸ“Š",
119
- description="Upload an Excel file, choose your visualization type, and ask a question about your data!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  )
121
- print("βœ… Gradio interface configured successfully!")
122
 
123
- # βœ… Mount Gradio app
124
- print("πŸ”— Mounting Gradio interface on FastAPI...")
125
- app = gr.mount_gradio_app(app, gradio_ui, path="/")
126
- print("βœ… Gradio interface mounted successfully!")
127
 
128
  @app.get("/")
129
  def home():
130
- print("🏠 Redirecting to UI...")
131
- return RedirectResponse(url="/")
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ import fitz # PyMuPDF for PDF parsing
3
+ from tika import parser # Apache Tika for document parsing
4
+ import openpyxl
5
+ from pptx import Presentation
6
+ import torch
7
+ from torchvision import transforms
8
+ from torchvision.models.detection import fasterrcnn_resnet50_fpn
9
+ from PIL import Image
10
  from transformers import pipeline
11
+ import gradio as gr
12
  from fastapi.responses import RedirectResponse
13
+ import numpy as np
14
+ import easyocr
 
 
 
 
 
 
 
 
15
 
16
+ # Initialize FastAPI
17
  app = FastAPI()
18
 
19
+ # Load AI Model for Question Answering (DeepSeek-V2-Chat)
20
+ qa_pipeline = pipeline("text-generation", model="deepseek-ai/DeepSeek-V2-Chat")
21
+
22
+ # Load Pretrained Object Detection Model (if needed)
23
+ model = fasterrcnn_resnet50_fpn(pretrained=True)
24
+ model.eval()
25
+
26
+ # Initialize OCR Model (Lazy Load)
27
+ reader = easyocr.Reader(["en"], gpu=True)
28
+
29
+ # Image Transformations
30
+ transform = transforms.Compose([
31
+ transforms.ToTensor()
32
+ ])
33
+
34
+ # Allowed File Extensions
35
+ ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
36
+
37
+ def validate_file_type(file):
38
+ ext = file.name.split(".")[-1].lower()
39
+ if ext not in ALLOWED_EXTENSIONS:
40
+ return f"Unsupported file format: {ext}"
41
+ return None
42
+
43
+ # Function to truncate text to 450 tokens
44
+ def truncate_text(text, max_tokens=450):
45
+ words = text.split()
46
+ return " ".join(words[:max_tokens])
47
+
48
+ # Document Text Extraction Functions
49
+ def extract_text_from_pdf(pdf_file):
50
+ try:
51
+ doc = fitz.open(pdf_file)
52
+ text = "\n".join([page.get_text("text") for page in doc])
53
+ return text if text else "No text found."
54
+ except Exception as e:
55
+ return f"Error reading PDF: {str(e)}"
56
+
57
+ def extract_text_with_tika(file):
58
  try:
59
+ parsed = parser.from_buffer(file)
60
+ return parsed.get("content", "No text found.").strip()
61
+ except Exception as e:
62
+ return f"Error reading document: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def extract_text_from_pptx(pptx_file):
65
+ try:
66
+ ppt = Presentation(pptx_file)
67
+ text = []
68
+ for slide in ppt.slides:
69
+ for shape in slide.shapes:
70
+ if hasattr(shape, "text"):
71
+ text.append(shape.text)
72
+ return "\n".join(text) if text else "No text found."
73
  except Exception as e:
74
+ return f"Error reading PPTX: {str(e)}"
75
+
76
+ def extract_text_from_excel(excel_file):
77
+ try:
78
+ wb = openpyxl.load_workbook(excel_file, read_only=True)
79
+ text = []
80
+ for sheet in wb.worksheets:
81
+ for row in sheet.iter_rows(values_only=True):
82
+ text.append(" ".join(map(str, row)))
83
+ return "\n".join(text) if text else "No text found."
84
+ except Exception as e:
85
+ return f"Error reading Excel: {str(e)}"
86
+
87
+ def extract_text_from_image(image_file):
88
+ image = Image.open(image_file).convert("RGB")
89
+ if np.array(image).std() < 10: # Low contrast = likely empty
90
+ return "No meaningful content detected in the image."
91
+
92
+ result = reader.readtext(np.array(image))
93
+ return " ".join([res[1] for res in result]) if result else "No text found."
94
+
95
+ # Function to answer questions based on document content
96
+ def answer_question_from_document(file, question):
97
+ validation_error = validate_file_type(file)
98
+ if validation_error:
99
+ return validation_error
100
+
101
+ file_ext = file.name.split(".")[-1].lower()
102
+ if file_ext == "pdf":
103
+ text = extract_text_from_pdf(file)
104
+ elif file_ext in ["docx", "pptx"]:
105
+ text = extract_text_with_tika(file)
106
+ elif file_ext == "xlsx":
107
+ text = extract_text_from_excel(file)
108
+ else:
109
+ return "Unsupported file format!"
110
+
111
+ if not text:
112
+ return "No text extracted from the document."
113
+
114
+ truncated_text = truncate_text(text)
115
+ response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
116
+
117
+ return response[0]["generated_text"]
118
+
119
+ def answer_question_from_image(image, question):
120
+ image_text = extract_text_from_image(image)
121
+ if not image_text:
122
+ return "No meaningful content detected in the image."
123
+
124
+ truncated_text = truncate_text(image_text)
125
+ response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
126
+
127
+ return response[0]["generated_text"]
128
+
129
+ # Gradio UI for Document & Image QA
130
+ doc_interface = gr.Interface(
131
+ fn=answer_question_from_document,
132
+ inputs=[gr.File(label="Upload Document"), gr.Textbox(label="Ask a Question")],
133
+ outputs="text",
134
+ title="AI Document Question Answering"
135
+ )
136
+
137
+ img_interface = gr.Interface(
138
+ fn=answer_question_from_image,
139
+ inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")],
140
+ outputs="text",
141
+ title="AI Image Question Answering"
142
  )
 
143
 
144
+ # Mount Gradio Interfaces
145
+ demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"])
146
+ app = gr.mount_gradio_app(app, demo, path="/")
 
147
 
148
  @app.get("/")
149
  def home():
150
+ return RedirectResponse(url="/")