hmrizal commited on
Commit
a61644e
·
verified ·
1 Parent(s): ed96c03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -45
app.py CHANGED
@@ -3,12 +3,12 @@ import os
3
  import uuid
4
  import threading
5
  import pandas as pd
6
- import torch
7
  from langchain.document_loaders.csv_loader import CSVLoader
8
  from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.vectorstores import FAISS
10
- from langchain.llms import CTransformers
11
  from langchain.chains import ConversationalRetrievalChain
 
12
 
13
  # Global model cache
14
  MODEL_CACHE = {
@@ -20,20 +20,36 @@ MODEL_CACHE = {
20
  os.makedirs("user_data", exist_ok=True)
21
 
22
  def initialize_model_once():
23
- """Initialize the model once and cache it"""
24
  with MODEL_CACHE["init_lock"]:
25
  if MODEL_CACHE["model"] is None:
26
- # Path ke model local dalam repository
27
- model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
28
- MODEL_CACHE["model"] = CTransformers(
29
- model=model_path,
30
- model_type="tinyllama",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  max_new_tokens=512,
32
  temperature=0.2,
33
  top_p=0.9,
34
- top_k=50,
35
  repetition_penalty=1.2
36
  )
 
 
 
37
 
38
  return MODEL_CACHE["model"]
39
 
@@ -53,27 +69,18 @@ class ChatBot:
53
  # Handle file from Gradio
54
  file_path = file.name if hasattr(file, 'name') else str(file)
55
 
56
- # Copy to user directory
57
- user_file_path = f"{self.user_dir}/uploaded.csv"
58
-
59
- # For debugging
60
- print(f"Processing file: {file_path}")
61
- print(f"Saving to: {user_file_path}")
62
-
63
- # Verify the CSV can be loaded
64
  try:
65
  df = pd.read_csv(file_path)
66
- print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns")
67
-
68
- # Save a copy in user directory
69
  df.to_csv(user_file_path, index=False)
 
70
  except Exception as e:
71
  return f"Error membaca CSV: {str(e)}"
72
 
73
  # Load document
74
  try:
75
- loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={
76
- 'delimiter': ','})
77
  data = loader.load()
78
  print(f"Documents loaded: {len(data)}")
79
  except Exception as e:
@@ -84,7 +91,7 @@ class ChatBot:
84
  db_path = f"{self.user_dir}/db_faiss"
85
  embeddings = HuggingFaceEmbeddings(
86
  model_name='sentence-transformers/all-MiniLM-L6-v2',
87
- model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
88
  )
89
 
90
  db = FAISS.from_documents(data, embeddings)
@@ -104,11 +111,11 @@ class ChatBot:
104
  except Exception as e:
105
  return f"Error creating chain: {str(e)}"
106
 
107
- # Add basic file info to chat history for context
108
  file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}"
109
  self.chat_history.append(("System", file_info))
110
 
111
- return "File CSV berhasil diproses! Anda dapat mulai chat dengan model Llama2."
112
  except Exception as e:
113
  import traceback
114
  print(traceback.format_exc())
@@ -119,29 +126,23 @@ class ChatBot:
119
  return "Mohon upload file CSV terlebih dahulu."
120
 
121
  try:
122
- # Process the question with the chain
123
  result = self.chain({"question": message, "chat_history": self.chat_history})
124
 
125
- # Update internal chat history
126
  answer = result["answer"]
127
  self.chat_history.append((message, answer))
128
 
129
- # Return just the answer for Gradio
130
  return answer
131
  except Exception as e:
132
  import traceback
133
  print(traceback.format_exc())
134
  return f"Error: {str(e)}"
135
 
136
- def cleanup(self):
137
- """Release resources when session ends"""
138
- self.chain = None
139
-
140
  def create_gradio_interface():
141
  with gr.Blocks(title="Chat with CSV using Llama2 🦙") as interface:
142
- # Create unique session ID for each user
143
  session_id = gr.State(lambda: str(uuid.uuid4()))
144
- # Create user-specific chatbot instance
145
  chatbot_state = gr.State(lambda: None)
146
 
147
  gr.HTML("<h1 style='text-align: center;'>Chat with CSV using Llama2 🦙</h1>")
@@ -157,11 +158,11 @@ def create_gradio_interface():
157
 
158
  with gr.Accordion("Informasi Model", open=False):
159
  gr.Markdown("""
160
- **Model**: Llama-2-7b-chat
161
 
162
  **Fitur**:
163
  - Dioptimalkan untuk analisis data dan percakapan
164
- - Efisien dengan kuantisasi GGUF
165
  - Manajemen sesi per pengguna
166
  """)
167
 
@@ -178,9 +179,8 @@ def create_gradio_interface():
178
  submit_button = gr.Button("Kirim")
179
  clear_button = gr.Button("Bersihkan Chat")
180
 
181
- # Process file handler
182
  def handle_process_file(file, sess_id):
183
- # Create chatbot if doesn't exist
184
  chatbot = ChatBot(sess_id)
185
  result = chatbot.process_file(file)
186
  return chatbot, [(None, result)]
@@ -191,14 +191,11 @@ def create_gradio_interface():
191
  outputs=[chatbot_state, chatbot_interface]
192
  )
193
 
194
- # Chat handler - show user message immediately and then start thinking
195
  def user_message_submitted(message, history, chatbot, sess_id):
196
- # Add user message to history immediately
197
  history = history + [(message, None)]
198
  return history, "", chatbot, sess_id
199
 
200
  def bot_response(history, chatbot, sess_id):
201
- # Create chatbot if doesn't exist
202
  if chatbot is None:
203
  chatbot = ChatBot(sess_id)
204
  history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
@@ -206,8 +203,6 @@ def create_gradio_interface():
206
 
207
  user_message = history[-1][0]
208
  response = chatbot.chat(user_message, history[:-1])
209
-
210
- # Update the last history item with the response
211
  history[-1] = (user_message, response)
212
  return chatbot, history
213
 
@@ -221,7 +216,6 @@ def create_gradio_interface():
221
  outputs=[chatbot_state, chatbot_interface]
222
  )
223
 
224
- # Also hook up message input for pressing Enter
225
  message_input.submit(
226
  fn=user_message_submitted,
227
  inputs=[message_input, chatbot_interface, chatbot_state, session_id],
@@ -232,7 +226,6 @@ def create_gradio_interface():
232
  outputs=[chatbot_state, chatbot_interface]
233
  )
234
 
235
- # Clear chat handler
236
  def handle_clear_chat(chatbot):
237
  if chatbot is not None:
238
  chatbot.chat_history = []
 
3
  import uuid
4
  import threading
5
  import pandas as pd
 
6
  from langchain.document_loaders.csv_loader import CSVLoader
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.vectorstores import FAISS
9
+ from langchain.llms import HuggingFacePipeline
10
  from langchain.chains import ConversationalRetrievalChain
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
12
 
13
  # Global model cache
14
  MODEL_CACHE = {
 
20
  os.makedirs("user_data", exist_ok=True)
21
 
22
  def initialize_model_once():
23
+ """Initialize model once using pipeline API"""
24
  with MODEL_CACHE["init_lock"]:
25
  if MODEL_CACHE["model"] is None:
26
+ # Load model from Hugging Face Hub
27
+ model_id = "meta-llama/Llama-2-7b-chat-hf"
28
+
29
+ # Tokenizer
30
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ.get("HF_TOKEN"))
31
+
32
+ # Model with low precision
33
+ model = AutoModelForCausalLM.from_pretrained(
34
+ model_id,
35
+ token=os.environ.get("HF_TOKEN"),
36
+ device_map="auto",
37
+ load_in_8bit=True # Quantize model to 8-bit precision
38
+ )
39
+
40
+ # Create pipeline
41
+ pipe = pipeline(
42
+ "text-generation",
43
+ model=model,
44
+ tokenizer=tokenizer,
45
  max_new_tokens=512,
46
  temperature=0.2,
47
  top_p=0.9,
 
48
  repetition_penalty=1.2
49
  )
50
+
51
+ # Create LangChain wrapper
52
+ MODEL_CACHE["model"] = HuggingFacePipeline(pipeline=pipe)
53
 
54
  return MODEL_CACHE["model"]
55
 
 
69
  # Handle file from Gradio
70
  file_path = file.name if hasattr(file, 'name') else str(file)
71
 
72
+ # Verify and save CSV
 
 
 
 
 
 
 
73
  try:
74
  df = pd.read_csv(file_path)
75
+ user_file_path = f"{self.user_dir}/uploaded.csv"
 
 
76
  df.to_csv(user_file_path, index=False)
77
+ print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns")
78
  except Exception as e:
79
  return f"Error membaca CSV: {str(e)}"
80
 
81
  # Load document
82
  try:
83
+ loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={'delimiter': ','})
 
84
  data = loader.load()
85
  print(f"Documents loaded: {len(data)}")
86
  except Exception as e:
 
91
  db_path = f"{self.user_dir}/db_faiss"
92
  embeddings = HuggingFaceEmbeddings(
93
  model_name='sentence-transformers/all-MiniLM-L6-v2',
94
+ model_kwargs={'device': 'auto'}
95
  )
96
 
97
  db = FAISS.from_documents(data, embeddings)
 
111
  except Exception as e:
112
  return f"Error creating chain: {str(e)}"
113
 
114
+ # Add file info to chat history
115
  file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}"
116
  self.chat_history.append(("System", file_info))
117
 
118
+ return "File CSV berhasil diproses! Anda dapat mulai chat dengan model Llama 2."
119
  except Exception as e:
120
  import traceback
121
  print(traceback.format_exc())
 
126
  return "Mohon upload file CSV terlebih dahulu."
127
 
128
  try:
129
+ # Process with the chain
130
  result = self.chain({"question": message, "chat_history": self.chat_history})
131
 
132
+ # Update chat history
133
  answer = result["answer"]
134
  self.chat_history.append((message, answer))
135
 
 
136
  return answer
137
  except Exception as e:
138
  import traceback
139
  print(traceback.format_exc())
140
  return f"Error: {str(e)}"
141
 
142
+ # UI Code dan handler functions sama seperti sebelumnya
 
 
 
143
  def create_gradio_interface():
144
  with gr.Blocks(title="Chat with CSV using Llama2 🦙") as interface:
 
145
  session_id = gr.State(lambda: str(uuid.uuid4()))
 
146
  chatbot_state = gr.State(lambda: None)
147
 
148
  gr.HTML("<h1 style='text-align: center;'>Chat with CSV using Llama2 🦙</h1>")
 
158
 
159
  with gr.Accordion("Informasi Model", open=False):
160
  gr.Markdown("""
161
+ **Model**: Llama-2-7b-chat-hf
162
 
163
  **Fitur**:
164
  - Dioptimalkan untuk analisis data dan percakapan
165
+ - Menggunakan API Hugging Face untuk efisiensi
166
  - Manajemen sesi per pengguna
167
  """)
168
 
 
179
  submit_button = gr.Button("Kirim")
180
  clear_button = gr.Button("Bersihkan Chat")
181
 
182
+ # Handler functions
183
  def handle_process_file(file, sess_id):
 
184
  chatbot = ChatBot(sess_id)
185
  result = chatbot.process_file(file)
186
  return chatbot, [(None, result)]
 
191
  outputs=[chatbot_state, chatbot_interface]
192
  )
193
 
 
194
  def user_message_submitted(message, history, chatbot, sess_id):
 
195
  history = history + [(message, None)]
196
  return history, "", chatbot, sess_id
197
 
198
  def bot_response(history, chatbot, sess_id):
 
199
  if chatbot is None:
200
  chatbot = ChatBot(sess_id)
201
  history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
 
203
 
204
  user_message = history[-1][0]
205
  response = chatbot.chat(user_message, history[:-1])
 
 
206
  history[-1] = (user_message, response)
207
  return chatbot, history
208
 
 
216
  outputs=[chatbot_state, chatbot_interface]
217
  )
218
 
 
219
  message_input.submit(
220
  fn=user_message_submitted,
221
  inputs=[message_input, chatbot_interface, chatbot_state, session_id],
 
226
  outputs=[chatbot_state, chatbot_interface]
227
  )
228
 
 
229
  def handle_clear_chat(chatbot):
230
  if chatbot is not None:
231
  chatbot.chat_history = []