Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import uuid | |
import threading | |
import pandas as pd | |
import numpy as np | |
from langchain.llms import CTransformers | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
# Global model cache | |
MODEL_CACHE = { | |
"model": None, | |
"init_lock": threading.Lock() | |
} | |
# Create directories for user data | |
os.makedirs("user_data", exist_ok=True) | |
def initialize_model_once(): | |
"""Initialize model once using CTransformers API""" | |
with MODEL_CACHE["init_lock"]: | |
if MODEL_CACHE["model"] is None: | |
# Load TinyLlama model | |
MODEL_CACHE["model"] = CTransformers( | |
model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", | |
model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", | |
model_type="llama", | |
max_new_tokens=512, | |
temperature=0.1, | |
repetition_penalty=1.1, | |
context_length=2048 | |
) | |
return MODEL_CACHE["model"] | |
class ChatBot: | |
def __init__(self, session_id): | |
self.session_id = session_id | |
self.csv_info = None | |
self.df = None | |
self.chat_history = [] | |
self.user_dir = f"user_data/{session_id}" | |
os.makedirs(self.user_dir, exist_ok=True) | |
def process_file(self, file): | |
if file is None: | |
return "Mohon upload file CSV terlebih dahulu." | |
try: | |
# Handle file from Gradio | |
file_path = file.name if hasattr(file, 'name') else str(file) | |
file_name = os.path.basename(file_path) | |
# Load and save CSV directly with pandas | |
try: | |
self.df = pd.read_csv(file_path) | |
user_file_path = f"{self.user_dir}/uploaded.csv" | |
self.df.to_csv(user_file_path, index=False) | |
# Store CSV info | |
self.csv_info = { | |
"filename": file_name, | |
"rows": self.df.shape[0], | |
"columns": self.df.shape[1], | |
"column_names": self.df.columns.tolist(), | |
} | |
print(f"CSV verified: {self.df.shape[0]} rows, {len(self.df.columns)} columns") | |
except Exception as e: | |
return f"Error membaca CSV: {str(e)}" | |
# Create query translator | |
try: | |
llm = initialize_model_once() | |
query_template = """ | |
Kamu adalah asisten data yang mengubah pertanyaan bahasa natural menjadi kode Python dengan Pandas. | |
Informasi tentang DataFrame: | |
- Nama kolom: {column_names} | |
- Jumlah baris: {num_rows} | |
- Sampel data: | |
{sample_data} | |
Pertanyaan pengguna: {question} | |
Buat kode Python menggunakan pandas untuk menjawab pertanyaan tersebut. | |
Berikan HANYA kode Python saja, tanpa penjelasan atau apapun. | |
Kode harus menggunakan variabel 'df' sebagai nama DataFrame. | |
Kode: | |
""" | |
self.query_chain = LLMChain( | |
llm=llm, | |
prompt=PromptTemplate( | |
input_variables=["column_names", "num_rows", "sample_data", "question"], | |
template=query_template | |
) | |
) | |
print("Query translator created successfully") | |
except Exception as e: | |
return f"Error creating query translator: {str(e)}" | |
# Add file info to chat history | |
file_info = f"CSV berhasil dimuat: {file_name} dengan {self.df.shape[0]} baris dan {len(self.df.columns)} kolom. Kolom: {', '.join(self.df.columns.tolist())}" | |
self.chat_history.append(("System", file_info)) | |
return f"File CSV '{file_name}' berhasil diproses! Anda dapat mulai mengajukan pertanyaan tentang data." | |
except Exception as e: | |
import traceback | |
print(traceback.format_exc()) | |
return f"Error pemrosesan file: {str(e)}" | |
def execute_query(self, code): | |
"""Safely execute pandas code""" | |
try: | |
# Create local context with the dataframe | |
local_vars = {"df": self.df, "pd": pd, "np": np} | |
# Execute code with timeout | |
exec(code, {"pd": pd, "np": np}, local_vars) | |
# Get result | |
if "result" in local_vars: | |
return local_vars["result"] | |
else: | |
# If no result variable, find the last variable created | |
last_var = None | |
for var_name, var_value in local_vars.items(): | |
if var_name not in ["df", "pd", "np"] and var_name != "__builtins__": | |
last_var = var_value | |
if last_var is not None: | |
return last_var | |
else: | |
return self.df # Return the dataframe as default | |
except Exception as e: | |
raise Exception(f"Gagal menjalankan kode: {str(e)}") | |
def chat(self, message, history): | |
if self.df is None: | |
return "Mohon upload file CSV terlebih dahulu." | |
try: | |
# Handle metadata questions directly | |
message_lower = message.lower() | |
if "nama file" in message_lower: | |
return f"Nama file CSV adalah: {self.csv_info['filename']}" | |
elif "nama kolom" in message_lower: | |
return f"Kolom dalam CSV: {', '.join(self.csv_info['column_names'])}" | |
elif "jumlah baris" in message_lower or "berapa baris" in message_lower: | |
return f"Jumlah baris dalam CSV: {self.csv_info['rows']}" | |
# Handle pre-defined analysis questions | |
if "glucose di atas 150" in message_lower: | |
code = "result = len(df[df['Glucose'] > 150])" | |
else: | |
# Get sample data for context | |
sample_str = self.df.head(3).to_string() | |
# Translate question to pandas code using LLM | |
try: | |
code_response = self.query_chain.run( | |
column_names=str(self.csv_info["column_names"]), | |
num_rows=self.csv_info["rows"], | |
sample_data=sample_str, | |
question=message | |
) | |
# Clean the code | |
code = code_response.strip().replace("```python", "").replace("```", "").strip() | |
# Add result variable if not present | |
if not any(line.strip().startswith("result =") for line in code.split("\n")): | |
if code.startswith("df."): | |
code = "result = " + code | |
else: | |
code = "result = df." + code | |
except Exception as e: | |
# Fallback for common queries if LLM fails | |
if "rata-rata" in message_lower or "mean" in message_lower: | |
code = "result = df.describe()" | |
elif "jumlah" in message_lower or "count" in message_lower: | |
code = "result = df.count()" | |
elif "distribusi" in message_lower: | |
col = next((c for c in self.csv_info["column_names"] if c.lower() in message_lower), None) | |
if col: | |
code = f"result = df['{col}'].value_counts()" | |
else: | |
code = "result = df.describe()" | |
else: | |
return f"Maaf, saya tidak dapat memproses pertanyaan ini. Error: {str(e)}" | |
# Execute the code and get result | |
try: | |
print(f"Executing code: {code}") | |
result = self.execute_query(code) | |
# Format result based on its type | |
if isinstance(result, pd.DataFrame): | |
if len(result) > 5: | |
result_str = result.head(5).to_string() + f"\n\n[Total {len(result)} baris]" | |
else: | |
result_str = result.to_string() | |
elif isinstance(result, (pd.Series, np.ndarray)): | |
if len(result) > 10: | |
result_str = str(result[:10]) + f"\n\n[Total {len(result)} item]" | |
else: | |
result_str = str(result) | |
elif hasattr(result, "__len__") and not isinstance(result, (str, int, float)): | |
result_str = str(result) | |
if len(result) > 0: | |
result_str += f"\n\n[Total {len(result)} item]" | |
else: | |
result_str = str(result) | |
# Format response | |
response = f"Hasil analisis:\n\n{result_str}\n\nKode yang dijalankan:\n```python\n{code}\n```" | |
self.chat_history.append((message, response)) | |
return response | |
except Exception as e: | |
return f"Error saat menganalisis data: {str(e)}\n\nKode yang dicoba:\n```python\n{code}\n```" | |
except Exception as e: | |
import traceback | |
print(traceback.format_exc()) | |
return f"Error: {str(e)}" | |
# UI Code (tidak berubah dari sebelumnya) | |
def create_gradio_interface(): | |
with gr.Blocks(title="CSV Data Analyzer") as interface: | |
session_id = gr.State(lambda: str(uuid.uuid4())) | |
chatbot_state = gr.State(lambda: None) | |
gr.HTML("<h1 style='text-align: center;'>CSV Data Analyzer</h1>") | |
gr.HTML("<h3 style='text-align: center;'>Ajukan pertanyaan tentang data CSV Anda</h3>") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_input = gr.File( | |
label="Upload CSV Anda", | |
file_types=[".csv"] | |
) | |
process_button = gr.Button("Proses CSV") | |
with gr.Accordion("Contoh Pertanyaan", open=False): | |
gr.Markdown(""" | |
- "Berapa jumlah data yang memiliki nilai Glucose di atas 150?" | |
- "Hitung nilai rata-rata setiap kolom numerik" | |
- "Berapa banyak data untuk setiap kelompok dalam kolom Outcome?" | |
- "Berapa jumlah baris dalam dataset ini?" | |
""") | |
with gr.Column(scale=2): | |
chatbot_interface = gr.Chatbot( | |
label="Riwayat Chat", | |
height=400 | |
) | |
message_input = gr.Textbox( | |
label="Ketik pertanyaan Anda", | |
placeholder="Contoh: Berapa jumlah data yang memiliki nilai Glucose di atas 150?", | |
lines=2 | |
) | |
submit_button = gr.Button("Kirim") | |
clear_button = gr.Button("Bersihkan Chat") | |
# Handler functions | |
def handle_process_file(file, sess_id): | |
chatbot = ChatBot(sess_id) | |
result = chatbot.process_file(file) | |
return chatbot, [(None, result)] | |
process_button.click( | |
fn=handle_process_file, | |
inputs=[file_input, session_id], | |
outputs=[chatbot_state, chatbot_interface] | |
) | |
def user_message_submitted(message, history, chatbot, sess_id): | |
history = history + [(message, None)] | |
return history, "", chatbot, sess_id | |
def bot_response(history, chatbot, sess_id): | |
if chatbot is None: | |
chatbot = ChatBot(sess_id) | |
history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.") | |
return chatbot, history | |
user_message = history[-1][0] | |
response = chatbot.chat(user_message, history[:-1]) | |
history[-1] = (user_message, response) | |
return chatbot, history | |
submit_button.click( | |
fn=user_message_submitted, | |
inputs=[message_input, chatbot_interface, chatbot_state, session_id], | |
outputs=[chatbot_interface, message_input, chatbot_state, session_id] | |
).then( | |
fn=bot_response, | |
inputs=[chatbot_interface, chatbot_state, session_id], | |
outputs=[chatbot_state, chatbot_interface] | |
) | |
message_input.submit( | |
fn=user_message_submitted, | |
inputs=[message_input, chatbot_interface, chatbot_state, session_id], | |
outputs=[chatbot_interface, message_input, chatbot_state, session_id] | |
).then( | |
fn=bot_response, | |
inputs=[chatbot_interface, chatbot_state, session_id], | |
outputs=[chatbot_state, chatbot_interface] | |
) | |
def handle_clear_chat(chatbot): | |
if chatbot is not None: | |
chatbot.chat_history = [] | |
return chatbot, [] | |
clear_button.click( | |
fn=handle_clear_chat, | |
inputs=[chatbot_state], | |
outputs=[chatbot_state, chatbot_interface] | |
) | |
return interface | |
# Launch the interface | |
if __name__ == "__main__": | |
demo = create_gradio_interface() | |
demo.launch(share=True) |