BramLeo commited on
Commit
5b13161
·
verified ·
1 Parent(s): 384b6d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -112
app.py CHANGED
@@ -1,20 +1,10 @@
1
  import gradio as gr
2
  import gspread
3
- import time
 
4
  from oauth2client.service_account import ServiceAccountCredentials
5
- from llama_cpp import Llama
6
- from llama_index.core import VectorStoreIndex, Settings
7
- from llama_index.core.node_parser import SentenceSplitter
8
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
9
- from llama_index.llms.llama_cpp import LlamaCPP
10
- from huggingface_hub import hf_hub_download
11
- from llama_index.core.llms import ChatMessage
12
- from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
13
- from llama_index.core.schema import Document
14
-
15
- # ===================================
16
- # 1️⃣ Cache Data Google Sheets
17
- # ===================================
18
  cached_text_data = None
19
 
20
  def read_google_sheets():
@@ -26,13 +16,13 @@ def read_google_sheets():
26
  scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
27
  creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
28
  client = gspread.authorize(creds)
29
-
30
  SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
31
  sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
32
 
33
  all_data = []
34
  spreadsheet = client.open_by_key(SPREADSHEET_ID)
35
-
36
  for sheet_name in sheet_names:
37
  try:
38
  sheet = spreadsheet.worksheet(sheet_name)
@@ -41,119 +31,81 @@ def read_google_sheets():
41
  all_data.extend([" | ".join(row) for row in data])
42
  all_data.append("\n")
43
  except gspread.exceptions.WorksheetNotFound:
44
- all_data.append(f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan.")
45
 
46
  cached_text_data = "\n".join(all_data).strip()
47
  return cached_text_data
48
-
49
- except gspread.exceptions.SpreadsheetNotFound:
50
- return "❌ ERROR: Spreadsheet tidak ditemukan!"
51
-
52
  except Exception as e:
53
  return f"❌ ERROR: {str(e)}"
54
 
55
- # ===================================
56
- # 2️⃣ Inisialisasi Model Llama
57
- # ===================================
58
- def initialize_llama_model():
59
- model_path = hf_hub_download(
60
- repo_id="TheBloke/zephyr-7b-beta-GGUF",
61
- filename="zephyr-7b-beta.Q4_K_M.gguf",
62
- cache_dir="./models"
 
63
  )
64
- return model_path
65
-
66
- # ===================================
67
- # 3️⃣ Inisialisasi Pengaturan Model
68
- # ===================================
69
- def initialize_settings(model_path):
70
- Settings.llm = LlamaCPP(
71
- model_path=model_path,
72
- temperature=0.7,
73
- context_window=4096,
74
  max_new_tokens=512,
75
- # n_gpu_layers=20, # ❌ Hapus jika error
76
- model_kwargs={"n_ctx": 4096}
 
77
  )
 
78
 
79
- # ===================================
80
- # 4️⃣ Inisialisasi Index & Chat Engine
81
- # ===================================
82
- def initialize_index():
83
- text_data = read_google_sheets()
84
- document = Document(text=text_data)
85
- parser = SentenceSplitter(chunk_size=100, chunk_overlap=30)
86
- nodes = parser.get_nodes_from_documents([document])
87
-
88
- embedding = HuggingFaceEmbedding("sentence-transformers/all-MiniLM-L6-v2") # ✅ Lebih ringan
89
- Settings.embed_model = embedding
90
-
91
- index = VectorStoreIndex(nodes)
92
- return index
93
-
94
- def initialize_chat_engine(index):
95
- retriever = index.as_retriever(similarity_top_k=1) # ✅ Kurangi ke 1 untuk kecepatan
96
- chat_engine = CondensePlusContextChatEngine.from_defaults(
97
- retriever=retriever,
98
- verbose=False
99
- )
100
- return chat_engine
101
-
102
- # ===================================
103
- # 5️⃣ Fungsi untuk Merapikan Jawaban Chatbot
104
- # ===================================
105
- def clean_response(response):
106
- text = "".join(response.response_gen)
107
- text = text.replace("\n\n", "\n").strip()
108
- text = text.replace("user:", "").replace("jawaban:", "").replace("assistant:", "").strip()
109
- return text
110
-
111
- # ===================================
112
- # 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
113
- # ===================================
114
- def generate_response(message, history, chat_engine):
115
- if history is None:
116
- history = []
117
-
118
- chat_messages = [
119
- ChatMessage(
120
- role="system",
121
- content=(
122
- "Anda adalah chatbot HRD yang membantu karyawan memahami administrasi perusahaan. "
123
- "Jangan menjawab menggunakan Bahasa Inggris. "
124
- "Gunakan Bahasa Indonesia dengan gaya profesional dan ramah. "
125
- "Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu. "
126
- "Jawaban harus singkat, jelas, dan sesuai konteks. "
127
- "Jangan memberikan jawaban untuk pertanyaan yang tidak diajukan oleh pengguna. "
128
- "Jangan menyertakan rekomendasi pertanyaan lain."
129
- ),
130
- ),
131
- ]
132
-
133
- response = chat_engine.chat(message) # GANTI: pakai .chat() bukan .stream_chat()
134
- cleaned_text = response.response.strip() # GANTI: langsung ambil response
135
-
136
- history.append((message, cleaned_text))
137
- return cleaned_text
138
-
139
- # ===================================
140
- # 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
141
- # ===================================
142
  def main():
143
- model_path = initialize_llama_model()
144
- initialize_settings(model_path)
145
-
146
- index = initialize_index()
147
- chat_engine = initialize_chat_engine(index)
148
-
149
  def chatbot_response(message, history):
150
- return generate_response(message, history, chat_engine)
151
 
152
  gr.Interface(
153
  fn=chatbot_response,
154
  inputs=["text"],
155
  outputs=["text"],
156
- ).launch()
 
 
157
 
158
  if __name__ == "__main__":
159
  main()
 
1
  import gradio as gr
2
  import gspread
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
  from oauth2client.service_account import ServiceAccountCredentials
6
+
7
+ # =============== 1. Cache Google Sheets ===============
 
 
 
 
 
 
 
 
 
 
 
8
  cached_text_data = None
9
 
10
  def read_google_sheets():
 
16
  scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
17
  creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
18
  client = gspread.authorize(creds)
19
+
20
  SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
21
  sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
22
 
23
  all_data = []
24
  spreadsheet = client.open_by_key(SPREADSHEET_ID)
25
+
26
  for sheet_name in sheet_names:
27
  try:
28
  sheet = spreadsheet.worksheet(sheet_name)
 
31
  all_data.extend([" | ".join(row) for row in data])
32
  all_data.append("\n")
33
  except gspread.exceptions.WorksheetNotFound:
34
+ all_data.append(f"❌ Worksheet {sheet_name} tidak ditemukan.")
35
 
36
  cached_text_data = "\n".join(all_data).strip()
37
  return cached_text_data
38
+
 
 
 
39
  except Exception as e:
40
  return f"❌ ERROR: {str(e)}"
41
 
42
+
43
+ # =============== 2. Load Model Transformers ===============
44
+ def load_model():
45
+ model_id = "mistralai/Mistral-7B-Instruct-v0.2" # Bisa ganti ke Zephyr juga
46
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
47
+ model = AutoModelForCausalLM.from_pretrained(
48
+ model_id,
49
+ device_map="auto", # 🧠 Otomatis ke GPU jika tersedia
50
+ torch_dtype=torch.float16
51
  )
52
+ pipe = pipeline(
53
+ "text-generation",
54
+ model=model,
55
+ tokenizer=tokenizer,
56
+ device=0 if torch.cuda.is_available() else -1,
 
 
 
 
 
57
  max_new_tokens=512,
58
+ temperature=0.7,
59
+ repetition_penalty=1.2,
60
+ do_sample=True,
61
  )
62
+ return pipe
63
 
64
+
65
+ # =============== 3. Buat Prompt dan Jawaban ===============
66
+ def generate_prompt(user_message, context_data):
67
+ prompt = f"""
68
+ ### SISTEM:
69
+ Anda adalah asisten HRD yang membantu karyawan memahami administrasi perusahaan. Jawablah dengan Bahasa Indonesia yang profesional dan ramah. Jika tidak tahu, katakan tidak tahu dengan sopan. Jangan jawab dalam bahasa Inggris.
70
+
71
+ ### DATA:
72
+ {context_data}
73
+
74
+ ### PERTANYAAN:
75
+ {user_message}
76
+
77
+ ### JAWABAN:
78
+ """
79
+ return prompt.strip()
80
+
81
+
82
+ # =============== 4. Generate Response ===============
83
+ def generate_response(message, history, pipe):
84
+ context = read_google_sheets()
85
+ full_prompt = generate_prompt(message, context)
86
+ response = pipe(full_prompt)[0]["generated_text"]
87
+
88
+ # Bersihkan jawaban: ambil bagian setelah ### JAWABAN:
89
+ cleaned = response.split("### JAWABAN:")[-1].strip()
90
+ history = history or []
91
+ history.append((message, cleaned))
92
+ return cleaned
93
+
94
+
95
+ # =============== 5. Jalankan Gradio ===============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def main():
97
+ pipe = load_model()
98
+
 
 
 
 
99
  def chatbot_response(message, history):
100
+ return generate_response(message, history, pipe)
101
 
102
  gr.Interface(
103
  fn=chatbot_response,
104
  inputs=["text"],
105
  outputs=["text"],
106
+ title="Chatbot HRD - Transformers",
107
+ theme="compact"
108
+ ).launch(share=True)
109
 
110
  if __name__ == "__main__":
111
  main()