GloryIX commited on
Commit
66211f5
Β·
verified Β·
1 Parent(s): 16f1c26

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import gradio as gr
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.llms.huggingface_pipeline import HuggingFacePipeline
9
+ from langchain.schema.runnable import RunnableLambda
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from langchain.vectorstores import FAISS
12
+ from langchain.text_splitter import CharacterTextSplitter
13
+ from langchain.chains.retrieval_qa.base import RetrievalQA
14
+ import io
15
+ import contextlib
16
+ from PIL import Image
17
+ import unittest
18
+ from unittest.mock import patch
19
+
20
+ df = pd.read_csv('/content/global-super-store-dataset/Global_Superstore2.csv', encoding='ISO-8859-1')
21
+ schema_info = "\n".join([f"- `{col}` ({dtype})" for col, dtype in df.dtypes.items()])
22
+
23
+ history_df = pd.read_csv('/content/sample_requests_and_code_300plus.csv')
24
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
25
+ faiss_index = FAISS.from_texts(history_df['request'].tolist(), embeddings)
26
+ retriever = faiss_index.as_retriever()
27
+
28
+
29
+ # Load the model
30
+ model_name = "neuralmagic/Llama-2-7b-chat-quantized.w4a16"
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
32
+ model = AutoModelForCausalLM.from_pretrained(model_name)
33
+
34
+ # Create a text-generation pipeline
35
+ small_pipeline = pipeline(
36
+ "text-generation",
37
+ model=model,
38
+ tokenizer=tokenizer,
39
+ trust_remote_code=True,
40
+ device_map="auto",
41
+ max_new_tokens=250,
42
+ temperature=0.2,
43
+ top_p=0.9,
44
+ do_sample=True,
45
+ repetition_penalty=1.1,
46
+ pad_token_id=tokenizer.eos_token_id
47
+ )
48
+
49
+ llm = HuggingFacePipeline(pipeline=small_pipeline)
50
+ memory = ConversationBufferMemory()
51
+ retrieval_qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
52
+
53
+
54
+ def generate_prompt(user_query, schema_info):
55
+ retrieved_docs = retrieval_qa.run(user_query)
56
+
57
+ similar_doc = retriever.get_relevant_documents(user_query, k=1)
58
+ similar_code = ""
59
+ if similar_doc:
60
+ idx = similar_doc[0].metadata.get('index', None)
61
+ if idx is not None:
62
+ similar_code = history_df.iloc[idx]['code']
63
+
64
+ messages = [
65
+ {"role": "system", "content": f"""
66
+ You are an expert data analyst. Your response MUST:
67
+ - Return ONLY valid Python Pandas code (no text, no introductions, no explanations, no extra comments).
68
+ - ⚠️ Start IMMEDIATELY with the Python code block.
69
+ - ⚑ Use proper parentheses when using logical operators (&, |) in Pandas conditions.
70
+ - Always include necessary import statements.
71
+ - ⚑ Do NOT add ANY extra lines, comments, or explanations.
72
+ {f"- Reference similar code: {similar_code}" if similar_code else ""}
73
+ """},
74
+ {"role": "user", "content": f"""
75
+ Dataset Schema:
76
+ {retrieved_docs}
77
+
78
+ Query: {user_query}
79
+ """}
80
+ ]
81
+ prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
82
+ return prompt
83
+
84
+
85
+ def execute_generated_code(code):
86
+ local_env = {}
87
+ output = io.StringIO()
88
+ plt.close('all')
89
+ with contextlib.redirect_stdout(output), contextlib.redirect_stderr(output):
90
+ try:
91
+ exec(code, globals(), local_env)
92
+ if plt.get_fignums():
93
+ buf = io.BytesIO()
94
+ plt.savefig(buf, format='png')
95
+ buf.seek(0)
96
+ img = Image.open(buf)
97
+ return img
98
+ return None
99
+ except Exception:
100
+ return None
101
+
102
+
103
+ def process_query(user_query):
104
+ prompt = generate_prompt(user_query, schema_info)
105
+ llm_chain = RunnableLambda(lambda x: llm(x["user_query"]))
106
+ response = llm_chain.invoke({"user_query": prompt})
107
+ generated_code = response.strip()
108
+ if "```python" in generated_code:
109
+ generated_code = generated_code.split("```python")[1].split("```", 1)[0].strip()
110
+ elif "```" in generated_code:
111
+ generated_code = generated_code.split("```", 1)[1].split("```", 1)[0].strip()
112
+ return generated_code
113
+
114
+
115
+
116
+ def gradio_chat_interface(history, query):
117
+ history.append((query, "⏳ **Processing...**"))
118
+ yield history, None, ""
119
+ generated_code = process_query(query)
120
+ with open('/content/generated_code.py', 'w') as f:
121
+ f.write(generated_code)
122
+ image = execute_generated_code(generated_code)
123
+ history[-1] = (query, f"```python\n{generated_code}\n```) ")
124
+ yield history, image, ""
125
+
126
+ with gr.Blocks() as demo:
127
+ gr.Markdown("""
128
+ # **Interactive Pandas Chat with InsightAI** πŸ’¬
129
+ **Talk to your data, get instant answers!**
130
+
131
+ <div style="text-align: center;">
132
+ <table style="margin: 0 auto;">
133
+ <tr>
134
+ <td>πŸ” <strong>Explore your dataset!</strong></td>
135
+ <td>πŸ’» <strong>Instantly view generated Pandas code.</strong></td>
136
+ </tr>
137
+ <tr>
138
+ <td>πŸ“Š <strong>Get accurate responses with RAG-enhanced retrieval.</strong></td>
139
+ <td>πŸ“ˆ <strong>Live visualizations update on the right.</strong></td>
140
+ </tr>
141
+ </table>
142
+ </div>
143
+ """)
144
+ with gr.Row():
145
+ with gr.Column(scale=3):
146
+ chatbot = gr.Chatbot(label="Chat with RAG & Historical Context Expert")
147
+ query_input = gr.Textbox(placeholder="Type your query and press Enter...", label="Your Query")
148
+
149
+ with gr.Column(scale=2):
150
+ plot_output = gr.Image(label="πŸ“Š Visualization", height=500)
151
+
152
+ query_input.submit(
153
+ fn=gradio_chat_interface,
154
+ inputs=[chatbot, query_input],
155
+ outputs=[chatbot, plot_output, query_input]
156
+ )
157
+
158
+ demo.launch()