Spaces:

hmrizal
/

CSVBot-Llama2

Sleeping

App Files Files Community

CSVBot-Llama2 / app.py

hmrizal

Update app.py

35a41ec verified 4 months ago

raw

history blame

9.5 kB

	import gradio as gr
	import os
	import uuid
	import threading
	import pandas as pd
	import torch
	from langchain.document_loaders.csv_loader import CSVLoader
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.llms import CTransformers
	from langchain.chains import ConversationalRetrievalChain

	# Global model cache
	MODEL_CACHE = {
	"model": None,
	"init_lock": threading.Lock()
	}

	# Create directories for user data
	os.makedirs("user_data", exist_ok=True)

	def initialize_model_once():
	"""Initialize the model once and cache it"""
	with MODEL_CACHE["init_lock"]:
	if MODEL_CACHE["model"] is None:
	# Path ke model local dalam repository
	model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
	MODEL_CACHE["model"] = CTransformers(
	model=model_path,
	model_type="tinyllama",
	max_new_tokens=512,
	temperature=0.2,
	top_p=0.9,
	top_k=50,
	repetition_penalty=1.2
	)

	return MODEL_CACHE["model"]

	class ChatBot:
	def __init__(self, session_id):
	self.session_id = session_id
	self.chat_history = []
	self.chain = None
	self.user_dir = f"user_data/{session_id}"
	os.makedirs(self.user_dir, exist_ok=True)

	def process_file(self, file):
	if file is None:
	return "Mohon upload file CSV terlebih dahulu."

	try:
	# Handle file from Gradio
	file_path = file.name if hasattr(file, 'name') else str(file)

	# Copy to user directory
	user_file_path = f"{self.user_dir}/uploaded.csv"

	# For debugging
	print(f"Processing file: {file_path}")
	print(f"Saving to: {user_file_path}")

	# Verify the CSV can be loaded
	try:
	df = pd.read_csv(file_path)
	print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns")

	# Save a copy in user directory
	df.to_csv(user_file_path, index=False)
	except Exception as e:
	return f"Error membaca CSV: {str(e)}"

	# Load document
	try:
	loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={
	'delimiter': ','})
	data = loader.load()
	print(f"Documents loaded: {len(data)}")
	except Exception as e:
	return f"Error loading documents: {str(e)}"

	# Create vector database
	try:
	db_path = f"{self.user_dir}/db_faiss"
	embeddings = HuggingFaceEmbeddings(
	model_name='sentence-transformers/all-MiniLM-L6-v2',
	model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
	)

	db = FAISS.from_documents(data, embeddings)
	db.save_local(db_path)
	print(f"Vector database created at {db_path}")
	except Exception as e:
	return f"Error creating vector database: {str(e)}"

	# Create LLM and chain
	try:
	llm = initialize_model_once()
	self.chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=db.as_retriever(search_kwargs={"k": 4})
	)
	print("Chain created successfully")
	except Exception as e:
	return f"Error creating chain: {str(e)}"

	# Add basic file info to chat history for context
	file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}"
	self.chat_history.append(("System", file_info))

	return "File CSV berhasil diproses! Anda dapat mulai chat dengan model Llama2."
	except Exception as e:
	import traceback
	print(traceback.format_exc())
	return f"Error pemrosesan file: {str(e)}"

	def chat(self, message, history):
	if self.chain is None:
	return "Mohon upload file CSV terlebih dahulu."

	try:
	# Process the question with the chain
	result = self.chain({"question": message, "chat_history": self.chat_history})

	# Update internal chat history
	answer = result["answer"]
	self.chat_history.append((message, answer))

	# Return just the answer for Gradio
	return answer
	except Exception as e:
	import traceback
	print(traceback.format_exc())
	return f"Error: {str(e)}"

	def cleanup(self):
	"""Release resources when session ends"""
	self.chain = None

	def create_gradio_interface():
	with gr.Blocks(title="Chat with CSV using Llama2 🦙") as interface:
	# Create unique session ID for each user
	session_id = gr.State(lambda: str(uuid.uuid4()))
	# Create user-specific chatbot instance
	chatbot_state = gr.State(lambda: None)

	gr.HTML("<h1 style='text-align: center;'>Chat with CSV using Llama2 🦙</h1>")
	gr.HTML("<h3 style='text-align: center;'>Asisten analisis CSV yang powerfull</h3>")

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(
	label="Upload CSV Anda",
	file_types=[".csv"]
	)
	process_button = gr.Button("Proses CSV")

	with gr.Accordion("Informasi Model", open=False):
	gr.Markdown("""
	Model: Llama-2-7b-chat

	Fitur:
	- Dioptimalkan untuk analisis data dan percakapan
	- Efisien dengan kuantisasi GGUF
	- Manajemen sesi per pengguna
	""")

	with gr.Column(scale=2):
	chatbot_interface = gr.Chatbot(
	label="Riwayat Chat",
	height=400
	)
	message_input = gr.Textbox(
	label="Ketik pesan Anda",
	placeholder="Tanyakan tentang data CSV Anda...",
	lines=2
	)
	submit_button = gr.Button("Kirim")
	clear_button = gr.Button("Bersihkan Chat")

	# Process file handler
	def handle_process_file(file, sess_id):
	# Create chatbot if doesn't exist
	chatbot = ChatBot(sess_id)
	result = chatbot.process_file(file)
	return chatbot, [(None, result)]

	process_button.click(
	fn=handle_process_file,
	inputs=[file_input, session_id],
	outputs=[chatbot_state, chatbot_interface]
	)

	# Chat handler - show user message immediately and then start thinking
	def user_message_submitted(message, history, chatbot, sess_id):
	# Add user message to history immediately
	history = history + [(message, None)]
	return history, "", chatbot, sess_id

	def bot_response(history, chatbot, sess_id):
	# Create chatbot if doesn't exist
	if chatbot is None:
	chatbot = ChatBot(sess_id)
	history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
	return chatbot, history

	user_message = history[-1][0]
	response = chatbot.chat(user_message, history[:-1])

	# Update the last history item with the response
	history[-1] = (user_message, response)
	return chatbot, history

	submit_button.click(
	fn=user_message_submitted,
	inputs=[message_input, chatbot_interface, chatbot_state, session_id],
	outputs=[chatbot_interface, message_input, chatbot_state, session_id]
	).then(
	fn=bot_response,
	inputs=[chatbot_interface, chatbot_state, session_id],
	outputs=[chatbot_state, chatbot_interface]
	)

	# Also hook up message input for pressing Enter
	message_input.submit(
	fn=user_message_submitted,
	inputs=[message_input, chatbot_interface, chatbot_state, session_id],
	outputs=[chatbot_interface, message_input, chatbot_state, session_id]
	).then(
	fn=bot_response,
	inputs=[chatbot_interface, chatbot_state, session_id],
	outputs=[chatbot_state, chatbot_interface]
	)

	# Clear chat handler
	def handle_clear_chat(chatbot):
	if chatbot is not None:
	chatbot.chat_history = []
	return chatbot, []

	clear_button.click(
	fn=handle_clear_chat,
	inputs=[chatbot_state],
	outputs=[chatbot_state, chatbot_interface]
	)

	return interface

	# Launch the interface
	if __name__ == "__main__":
	demo = create_gradio_interface()
	demo.launch(share=True)