Spaces:

GloryIX
/

InsightAI

Sleeping

App Files Files Community

InsightAI / app.py

GloryIX

Update app.py

146c1d1 verified 4 months ago

raw

history blame contribute delete

5.67 kB

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import gradio as gr
	from sklearn.metrics.pairwise import cosine_similarity
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	from langchain.memory import ConversationBufferMemory
	from langchain.llms.huggingface_pipeline import HuggingFacePipeline
	from langchain.schema.runnable import RunnableLambda
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.chains.retrieval_qa.base import RetrievalQA
	import io
	import contextlib
	from PIL import Image
	import unittest
	from unittest.mock import patch

	df = pd.read_csv('Global_Superstore2.csv', encoding='ISO-8859-1')
	schema_info = "\n".join([f"- `{col}` ({dtype})" for col, dtype in df.dtypes.items()])

	history_df = pd.read_csv('sample_requests_and_code_300plus.csv')
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	faiss_index = FAISS.from_texts(history_df['request'].tolist(), embeddings)
	retriever = faiss_index.as_retriever()


	# Load the model
	model_name = "neuralmagic/Llama-2-7b-chat-quantized.w4a16"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Create a text-generation pipeline
	small_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	trust_remote_code=True,
	device_map="auto",
	max_new_tokens=250,
	temperature=0.2,
	top_p=0.9,
	do_sample=True,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id
	)

	llm = HuggingFacePipeline(pipeline=small_pipeline)
	memory = ConversationBufferMemory()
	retrieval_qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")


	def generate_prompt(user_query, schema_info):
	retrieved_docs = retrieval_qa.run(user_query)

	similar_doc = retriever.get_relevant_documents(user_query, k=1)
	similar_code = ""
	if similar_doc:
	idx = similar_doc[0].metadata.get('index', None)
	if idx is not None:
	similar_code = history_df.iloc[idx]['code']

	messages = [
	{"role": "system", "content": f"""
	You are an expert data analyst. Your response MUST:
	- Return ONLY valid Python Pandas code (no text, no introductions, no explanations, no extra comments).
	- ⚠️ Start IMMEDIATELY with the Python code block.
	- ⚡ Use proper parentheses when using logical operators (&, \|) in Pandas conditions.
	- Always include necessary import statements.
	- ⚡ Do NOT add ANY extra lines, comments, or explanations.
	{f"- Reference similar code: {similar_code}" if similar_code else ""}
	"""},
	{"role": "user", "content": f"""
	Dataset Schema:
	{retrieved_docs}

	Query: {user_query}
	"""}
	]
	prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
	return prompt


	def execute_generated_code(code):
	local_env = {}
	output = io.StringIO()
	plt.close('all')
	with contextlib.redirect_stdout(output), contextlib.redirect_stderr(output):
	try:
	exec(code, globals(), local_env)
	if plt.get_fignums():
	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	img = Image.open(buf)
	return img
	return None
	except Exception:
	return None


	def process_query(user_query):
	prompt = generate_prompt(user_query, schema_info)
	llm_chain = RunnableLambda(lambda x: llm(x["user_query"]))
	response = llm_chain.invoke({"user_query": prompt})
	generated_code = response.strip()
	if "```python" in generated_code:
	generated_code = generated_code.split("```python")[1].split("```", 1)[0].strip()
	elif "```" in generated_code:
	generated_code = generated_code.split("```", 1)[1].split("```", 1)[0].strip()
	return generated_code



	def gradio_chat_interface(history, query):
	history.append((query, "⏳ Processing..."))
	yield history, None, ""
	generated_code = process_query(query)
	with open('/content/generated_code.py', 'w') as f:
	f.write(generated_code)
	image = execute_generated_code(generated_code)
	history[-1] = (query, f"```python\n{generated_code}\n```) ")
	yield history, image, ""

	with gr.Blocks() as demo:
	gr.Markdown("""
	# Interactive Pandas Chat with InsightAI 💬
	Talk to your data, get instant answers!

	<div style="text-align: center;">
	<table style="margin: 0 auto;">
	<tr>
	<td>🔍 <strong>Explore your dataset!</strong></td>
	<td>💻 <strong>Instantly view generated Pandas code.</strong></td>
	</tr>
	<tr>
	<td>📊 <strong>Get accurate responses with RAG-enhanced retrieval.</strong></td>
	<td>📈 <strong>Live visualizations update on the right.</strong></td>
	</tr>
	</table>
	</div>
	""")
	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(label="Chat with RAG & Historical Context Expert")
	query_input = gr.Textbox(placeholder="Type your query and press Enter...", label="Your Query")

	with gr.Column(scale=2):
	plot_output = gr.Image(label="📊 Visualization", height=500)

	query_input.submit(
	fn=gradio_chat_interface,
	inputs=[chatbot, query_input],
	outputs=[chatbot, plot_output, query_input]
	)

	demo.launch()