InsightAI / app.py
GloryIX's picture
Update app.py
146c1d1 verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.memory import ConversationBufferMemory
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.schema.runnable import RunnableLambda
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.retrieval_qa.base import RetrievalQA
import io
import contextlib
from PIL import Image
import unittest
from unittest.mock import patch
df = pd.read_csv('Global_Superstore2.csv', encoding='ISO-8859-1')
schema_info = "\n".join([f"- `{col}` ({dtype})" for col, dtype in df.dtypes.items()])
history_df = pd.read_csv('sample_requests_and_code_300plus.csv')
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
faiss_index = FAISS.from_texts(history_df['request'].tolist(), embeddings)
retriever = faiss_index.as_retriever()
# Load the model
model_name = "neuralmagic/Llama-2-7b-chat-quantized.w4a16"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Create a text-generation pipeline
small_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
trust_remote_code=True,
device_map="auto",
max_new_tokens=250,
temperature=0.2,
top_p=0.9,
do_sample=True,
repetition_penalty=1.1,
pad_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline=small_pipeline)
memory = ConversationBufferMemory()
retrieval_qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
def generate_prompt(user_query, schema_info):
retrieved_docs = retrieval_qa.run(user_query)
similar_doc = retriever.get_relevant_documents(user_query, k=1)
similar_code = ""
if similar_doc:
idx = similar_doc[0].metadata.get('index', None)
if idx is not None:
similar_code = history_df.iloc[idx]['code']
messages = [
{"role": "system", "content": f"""
You are an expert data analyst. Your response MUST:
- Return ONLY valid Python Pandas code (no text, no introductions, no explanations, no extra comments).
- ⚠️ Start IMMEDIATELY with the Python code block.
- ⚑ Use proper parentheses when using logical operators (&, |) in Pandas conditions.
- Always include necessary import statements.
- ⚑ Do NOT add ANY extra lines, comments, or explanations.
{f"- Reference similar code: {similar_code}" if similar_code else ""}
"""},
{"role": "user", "content": f"""
Dataset Schema:
{retrieved_docs}
Query: {user_query}
"""}
]
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
return prompt
def execute_generated_code(code):
local_env = {}
output = io.StringIO()
plt.close('all')
with contextlib.redirect_stdout(output), contextlib.redirect_stderr(output):
try:
exec(code, globals(), local_env)
if plt.get_fignums():
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
img = Image.open(buf)
return img
return None
except Exception:
return None
def process_query(user_query):
prompt = generate_prompt(user_query, schema_info)
llm_chain = RunnableLambda(lambda x: llm(x["user_query"]))
response = llm_chain.invoke({"user_query": prompt})
generated_code = response.strip()
if "```python" in generated_code:
generated_code = generated_code.split("```python")[1].split("```", 1)[0].strip()
elif "```" in generated_code:
generated_code = generated_code.split("```", 1)[1].split("```", 1)[0].strip()
return generated_code
def gradio_chat_interface(history, query):
history.append((query, "⏳ **Processing...**"))
yield history, None, ""
generated_code = process_query(query)
with open('/content/generated_code.py', 'w') as f:
f.write(generated_code)
image = execute_generated_code(generated_code)
history[-1] = (query, f"```python\n{generated_code}\n```) ")
yield history, image, ""
with gr.Blocks() as demo:
gr.Markdown("""
# **Interactive Pandas Chat with InsightAI** πŸ’¬
**Talk to your data, get instant answers!**
<div style="text-align: center;">
<table style="margin: 0 auto;">
<tr>
<td>πŸ” <strong>Explore your dataset!</strong></td>
<td>πŸ’» <strong>Instantly view generated Pandas code.</strong></td>
</tr>
<tr>
<td>πŸ“Š <strong>Get accurate responses with RAG-enhanced retrieval.</strong></td>
<td>πŸ“ˆ <strong>Live visualizations update on the right.</strong></td>
</tr>
</table>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="Chat with RAG & Historical Context Expert")
query_input = gr.Textbox(placeholder="Type your query and press Enter...", label="Your Query")
with gr.Column(scale=2):
plot_output = gr.Image(label="πŸ“Š Visualization", height=500)
query_input.submit(
fn=gradio_chat_interface,
inputs=[chatbot, query_input],
outputs=[chatbot, plot_output, query_input]
)
demo.launch()