Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
import torch
|
3 |
import gradio as gr
|
4 |
|
@@ -6,20 +6,17 @@ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
|
|
6 |
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
7 |
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
8 |
alpaca_prompt = """Berikut adalah instruksi yang deskripsikan tugas dan sepasang input dan konteksnya. Tulis response sesuai dengan permintaan.
|
9 |
-
|
10 |
### Instruction:
|
11 |
{}
|
12 |
-
|
13 |
### Input:
|
14 |
{}
|
15 |
-
|
16 |
### Response:
|
17 |
{}"""
|
18 |
|
19 |
if True:
|
20 |
-
from
|
21 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
22 |
-
model_name = "abdfajar707/
|
23 |
max_seq_length = max_seq_length,
|
24 |
dtype = dtype,
|
25 |
load_in_4bit = load_in_4bit,
|
@@ -42,37 +39,26 @@ def generate_response(prompt, max_length=1000):
|
|
42 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
43 |
return response
|
44 |
|
45 |
-
|
46 |
-
def
|
47 |
-
|
48 |
-
|
49 |
-
# Perbarui riwayat percakapan
|
50 |
-
history.append(("User", user_input))
|
51 |
-
history.append(("Bot", response))
|
52 |
-
return history, history
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
outputs = [
|
61 |
-
gr.Chatbot(label="Respons Chatbot"),
|
62 |
-
gr.State() # Untuk memperbarui riwayat percakapan
|
63 |
-
]
|
64 |
-
|
65 |
-
# Buat dan luncurkan antarmuka Gradio
|
66 |
-
interface = gr.Interface(
|
67 |
-
fn=chatbot_interface,
|
68 |
-
inputs=inputs,
|
69 |
-
outputs=outputs,
|
70 |
-
title="LLaMA3 LoRA Chatbot",
|
71 |
-
description="Chatbot yang didukung oleh model LLaMA3 dengan modifikasi LoRA."
|
72 |
-
)
|
73 |
-
|
74 |
-
# Jalankan antarmuka
|
75 |
interface.launch()
|
76 |
-
|
77 |
-
#demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
|
78 |
-
#demo.launch()
|
|
|
1 |
+
from app import FastLanguageModel
|
2 |
import torch
|
3 |
import gradio as gr
|
4 |
|
|
|
6 |
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
7 |
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
8 |
alpaca_prompt = """Berikut adalah instruksi yang deskripsikan tugas dan sepasang input dan konteksnya. Tulis response sesuai dengan permintaan.
|
|
|
9 |
### Instruction:
|
10 |
{}
|
|
|
11 |
### Input:
|
12 |
{}
|
|
|
13 |
### Response:
|
14 |
{}"""
|
15 |
|
16 |
if True:
|
17 |
+
from app import FastLanguageModel
|
18 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
19 |
+
model_name = "abdfajar707/llama3_8B_lora_model_rkp_v2", # YOUR MODEL YOU USED FOR TRAINING
|
20 |
max_seq_length = max_seq_length,
|
21 |
dtype = dtype,
|
22 |
load_in_4bit = load_in_4bit,
|
|
|
39 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
40 |
return response
|
41 |
|
42 |
+
history = []
|
43 |
+
def wrapper_chat_history(chat_history, history):
|
44 |
+
chat_history = history[1:]
|
45 |
+
return chat_history
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
def converse(message, chat_history):
|
48 |
+
response = generate_response(message)
|
49 |
+
print(response)
|
50 |
+
user_msg = {"role": "user", "content": message}
|
51 |
+
history.append(user_msg)
|
52 |
+
ai_msg = {"role": "assistant", "content": response}
|
53 |
+
history.append(ai_msg)
|
54 |
+
return history[-1]["content"]
|
55 |
+
|
56 |
+
with gr.Blocks() as interface:
|
57 |
+
with gr.Row():
|
58 |
+
with gr.Column(scale=1):
|
59 |
+
gr.HTML('<img src="https://datahub.data.go.id/data/static/Kementerian%20PPN%20Bappenas%20Tanpa%20Teks.png" width="100px" alt="Image" style="max-width: 100%;">')
|
60 |
+
with gr.Row():
|
61 |
+
with gr.Column():
|
62 |
+
gr.ChatInterface(fn=converse, title="PPN/Bappenas - AI Interlinked")
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
interface.launch()
|
|
|
|
|
|