Spaces:

hmrizal
/

CSVBot-Llama2

Sleeping

App Files Files Community

hmrizal commited on Mar 14

Commit

71a08c8

verified ·

1 Parent(s): 57bee5c

Create app.py

Browse files

Files changed (1) hide show

app.py +252 -0

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import gradio as gr
+import os
+import uuid
+import threading
+import pandas as pd
+import torch
+from langchain.document_loaders.csv_loader import CSVLoader
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import CTransformers
+from langchain.chains import ConversationalRetrievalChain
+# Global model cache
+MODEL_CACHE = {
+    "model": None,
+    "init_lock": threading.Lock()
+}
+# Create directories for user data
+os.makedirs("user_data", exist_ok=True)
+def initialize_model_once():
+    """Initialize the model once and cache it"""
+    with MODEL_CACHE["init_lock"]:
+        if MODEL_CACHE["model"] is None:
+            # Path ke model local dalam repository
+            model_path = "llama-2-7b-chat.gguf"
+            MODEL_CACHE["model"] = CTransformers(
+                model=model_path,
+                model_type="llama",
+                max_new_tokens=512,
+                temperature=0.2,
+                top_p=0.9,
+                top_k=50,
+                repetition_penalty=1.2
+            )
+    return MODEL_CACHE["model"]
+class ChatBot:
+    def __init__(self, session_id):
+        self.session_id = session_id
+        self.chat_history = []
+        self.chain = None
+        self.user_dir = f"user_data/{session_id}"
+        os.makedirs(self.user_dir, exist_ok=True)
+    def process_file(self, file):
+        if file is None:
+            return "Mohon upload file CSV terlebih dahulu."
+        try:
+            # Handle file from Gradio
+            file_path = file.name if hasattr(file, 'name') else str(file)
+            # Copy to user directory
+            user_file_path = f"{self.user_dir}/uploaded.csv"
+            # For debugging
+            print(f"Processing file: {file_path}")
+            print(f"Saving to: {user_file_path}")
+            # Verify the CSV can be loaded
+            try:
+                df = pd.read_csv(file_path)
+                print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns")
+                # Save a copy in user directory
+                df.to_csv(user_file_path, index=False)
+            except Exception as e:
+                return f"Error membaca CSV: {str(e)}"
+            # Load document
+            try:
+                loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={
+                    'delimiter': ','})
+                data = loader.load()
+                print(f"Documents loaded: {len(data)}")
+            except Exception as e:
+                return f"Error loading documents: {str(e)}"
+            # Create vector database
+            try:
+                db_path = f"{self.user_dir}/db_faiss"
+                embeddings = HuggingFaceEmbeddings(
+                    model_name='sentence-transformers/all-MiniLM-L6-v2',
+                    model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
+                )
+                db = FAISS.from_documents(data, embeddings)
+                db.save_local(db_path)
+                print(f"Vector database created at {db_path}")
+            except Exception as e:
+                return f"Error creating vector database: {str(e)}"
+            # Create LLM and chain
+            try:
+                llm = initialize_model_once()
+                self.chain = ConversationalRetrievalChain.from_llm(
+                    llm=llm,
+                    retriever=db.as_retriever(search_kwargs={"k": 4})
+                )
+                print("Chain created successfully")
+            except Exception as e:
+                return f"Error creating chain: {str(e)}"
+            # Add basic file info to chat history for context
+            file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}"
+            self.chat_history.append(("System", file_info))
+            return "File CSV berhasil diproses! Anda dapat mulai chat dengan model Llama2."
+        except Exception as e:
+            import traceback
+            print(traceback.format_exc())
+            return f"Error pemrosesan file: {str(e)}"
+    def chat(self, message, history):
+        if self.chain is None:
+            return "Mohon upload file CSV terlebih dahulu."
+        try:
+            # Process the question with the chain
+            result = self.chain({"question": message, "chat_history": self.chat_history})
+            # Update internal chat history
+            answer = result["answer"]
+            self.chat_history.append((message, answer))
+            # Return just the answer for Gradio
+            return answer
+        except Exception as e:
+            import traceback
+            print(traceback.format_exc())
+            return f"Error: {str(e)}"
+    def cleanup(self):
+        """Release resources when session ends"""
+        self.chain = None
+def create_gradio_interface():
+    with gr.Blocks(title="Chat with CSV using Llama2 🦙") as interface:
+        # Create unique session ID for each user
+        session_id = gr.State(lambda: str(uuid.uuid4()))
+        # Create user-specific chatbot instance
+        chatbot_state = gr.State(lambda: None)
+        gr.HTML("<h1 style='text-align: center;'>Chat with CSV using Llama2 🦙</h1>")
+        gr.HTML("<h3 style='text-align: center;'>Asisten analisis CSV yang powerfull</h3>")
+        with gr.Row():
+            with gr.Column(scale=1):
+                file_input = gr.File(
+                    label="Upload CSV Anda",
+                    file_types=[".csv"]
+                )
+                process_button = gr.Button("Proses CSV")
+                with gr.Accordion("Informasi Model", open=False):
+                    gr.Markdown("""
+                    **Model**: Llama-2-7b-chat
+                    **Fitur**:
+                    - Dioptimalkan untuk analisis data dan percakapan
+                    - Efisien dengan kuantisasi GGUF
+                    - Manajemen sesi per pengguna
+                    """)
+            with gr.Column(scale=2):
+                chatbot_interface = gr.Chatbot(
+                    label="Riwayat Chat",
+                    height=400
+                )
+                message_input = gr.Textbox(
+                    label="Ketik pesan Anda",
+                    placeholder="Tanyakan tentang data CSV Anda...",
+                    lines=2
+                )
+                submit_button = gr.Button("Kirim")
+                clear_button = gr.Button("Bersihkan Chat")
+        # Process file handler
+        def handle_process_file(file, sess_id):
+            # Create chatbot if doesn't exist
+            chatbot = ChatBot(sess_id)
+            result = chatbot.process_file(file)
+            return chatbot, [(None, result)]
+        process_button.click(
+            fn=handle_process_file,
+            inputs=[file_input, session_id],
+            outputs=[chatbot_state, chatbot_interface]
+        )
+        # Chat handler - show user message immediately and then start thinking
+        def user_message_submitted(message, history, chatbot, sess_id):
+            # Add user message to history immediately
+            history = history + [(message, None)]
+            return history, "", chatbot, sess_id
+        def bot_response(history, chatbot, sess_id):
+            # Create chatbot if doesn't exist
+            if chatbot is None:
+                chatbot = ChatBot(sess_id)
+                history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
+                return chatbot, history
+            user_message = history[-1][0]
+            response = chatbot.chat(user_message, history[:-1])
+            # Update the last history item with the response
+            history[-1] = (user_message, response)
+            return chatbot, history
+        submit_button.click(
+            fn=user_message_submitted,
+            inputs=[message_input, chatbot_interface, chatbot_state, session_id],
+            outputs=[chatbot_interface, message_input, chatbot_state, session_id]
+        ).then(
+            fn=bot_response,
+            inputs=[chatbot_interface, chatbot_state, session_id],
+            outputs=[chatbot_state, chatbot_interface]
+        )
+        # Also hook up message input for pressing Enter
+        message_input.submit(
+            fn=user_message_submitted,
+            inputs=[message_input, chatbot_interface, chatbot_state, session_id],
+            outputs=[chatbot_interface, message_input, chatbot_state, session_id]
+        ).then(
+            fn=bot_response,
+            inputs=[chatbot_interface, chatbot_state, session_id],
+            outputs=[chatbot_state, chatbot_interface]
+        )
+        # Clear chat handler
+        def handle_clear_chat(chatbot):
+            if chatbot is not None:
+                chatbot.chat_history = []
+            return chatbot, []
+        clear_button.click(
+            fn=handle_clear_chat,
+            inputs=[chatbot_state],
+            outputs=[chatbot_state, chatbot_interface]
+        )
+    return interface
+# Launch the interface
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(share=True)