abdfajar707 commited on
Commit
408f80c
·
verified ·
1 Parent(s): 1faccb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -45
app.py CHANGED
@@ -1,60 +1,145 @@
1
  from unsloth import FastLanguageModel
2
  import torch
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
4
 
5
  #deklarasi
6
  max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
7
  dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
8
  load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
9
- alpaca_prompt = """Berikut adalah instruksi yang deskripsikan tugas dan sepasang input dan konteksnya. Tulis response sesuai dengan permintaan.
10
  ### Instruction:
11
  {}
12
  ### Input:
13
  {}
14
  ### Response:
15
- {}"""
16
 
17
  if True:
18
- from app import FastLanguageModel
19
  model, tokenizer = FastLanguageModel.from_pretrained(
20
- model_name = "abdfajar707/llama3_8B_lora_model_rkp_pn2025_v2", # YOUR MODEL YOU USED FOR TRAINING
21
  max_seq_length = max_seq_length,
22
  dtype = dtype,
23
  load_in_4bit = load_in_4bit,
24
- temperature = 0.1,
25
  )
26
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # Fungsi untuk menghasilkan respons
31
- def generate_response(prompt, max_length=1024):
32
- inputs = tokenizer(
33
- [
34
- alpaca_prompt.format(
35
- prompt, # instruction
36
- "", # input
37
- "", # output - leave this blank for generation!
 
 
 
38
  )
39
- ], return_tensors = "pt").to("cuda")
40
- outputs = model.generate(**inputs, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
41
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
- return response
43
-
44
- history = []
45
- def wrapper_chat_history(chat_history, history):
46
- chat_history = history[1:]
47
- return chat_history
48
-
49
- def converse(message, chat_history):
50
- response = generate_response(message)
51
- print(response)
52
- user_msg = {"role": "user", "content": message}
53
- history.append(user_msg)
54
- ai_msg = {"role": "assistant", "content": response}
55
- history.append(ai_msg)
56
- return history[-1]["content"]
57
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  DESCRIPTION = '''
59
  <div style="padding: 5px; text-align: left; display: flex; flex-direction: column; align-items: left;">
60
  <img src="https://sdgs.bappenas.go.id/repository/assets/bappenas_logo_square.png" style="width: 40%; max-width: 200px; height: auto; opacity: 0.55; ">
@@ -71,7 +156,7 @@ Dibangun dari Meta Llama 3
71
  PLACEHOLDER = """
72
  <div style="padding: 100px; text-align: center; display: flex; flex-direction: column; align-items: center;">
73
  <img src="https://cdn3.iconfinder.com/data/icons/human-resources-flat-3/48/150-4096.png" style="width: 1000; max-width: 200px; height: auto; opacity: 0.55; ">
74
- <h2 style="font-size: 20px; margin-bottom: 2px; opacity: 0.55;">Perencana-GPT</h2>
75
  <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Silakan mulai tanya...</p>
76
  </div>
77
  """
@@ -89,19 +174,12 @@ h1 {
89
  border-radius: 100vh;
90
  }
91
  """
92
- # Gradio block
93
  chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Interlinked Sytem ChatInterface')
94
 
95
- with gr.Blocks(fill_height=True, css=css) as demo:
96
-
97
  gr.Markdown(DESCRIPTION)
98
- gr.ChatInterface(
99
- fn=converse,
100
- chatbot=chatbot,
101
- fill_height=True,
102
- )
103
-
104
- gr.Markdown(LICENSE)
105
-
106
  if __name__ == "__main__":
107
- demo.launch()
 
1
  from unsloth import FastLanguageModel
2
  import torch
3
  import gradio as gr
4
+ import os
5
+ from threading import Thread
6
+ from typing import Iterator
7
+ from transformers import (
8
+ AutoModelForCausalLM,
9
+ BitsAndBytesConfig,
10
+ GemmaTokenizerFast,
11
+ TextIteratorStreamer,
12
+ )
13
+
14
 
15
  #deklarasi
16
  max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
17
  dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
18
  load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
19
+ #alpaca_prompt = """Berikut adalah instruksi yang deskripsikan tugas dan sepasang input dan konteksnya. Tulis response sesuai dengan permintaan.
20
  ### Instruction:
21
  {}
22
  ### Input:
23
  {}
24
  ### Response:
25
+ #{}"""
26
 
27
  if True:
28
+ from unsloth import FastLanguageModel
29
  model, tokenizer = FastLanguageModel.from_pretrained(
30
+ model_name = "abdfajar707/llama3_8B_lora_model_rkp_pn2025_v3", # YOUR MODEL YOU USED FOR TRAINING
31
  max_seq_length = max_seq_length,
32
  dtype = dtype,
33
  load_in_4bit = load_in_4bit,
 
34
  )
35
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
36
 
37
 
38
+ MAX_MAX_NEW_TOKENS = 2048
39
+ DEFAULT_MAX_NEW_TOKENS = 1024
40
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
41
+
42
+
43
+ model.config.sliding_window = 4096
44
+ model.eval()
45
+
46
+
47
+ #@spaces.GPU(duration=90)
48
+ def generate(
49
+ message: str,
50
+ chat_history: list[tuple[str, str]],
51
+ max_new_tokens: int = 1024,
52
+ temperature: float = 0.6,
53
+ top_p: float = 0.9,
54
+ top_k: int = 50,
55
+ repetition_penalty: float = 1.2,
56
+ ) -> Iterator[str]:
57
+ conversation = []
58
+ for user, assistant in chat_history:
59
+ conversation.extend(
60
+ [
61
+ {"role": "user", "content": user},
62
+ {"role": "assistant", "content": assistant},
63
+ ]
64
+ )
65
+ conversation.append({"role": "user", "content": message})
66
+
67
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
68
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
69
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
70
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
71
+ input_ids = input_ids.to(model.device)
72
 
73
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
74
+ generate_kwargs = dict(
75
+ {"input_ids": input_ids},
76
+ streamer=streamer,
77
+ max_new_tokens=max_new_tokens,
78
+ do_sample=True,
79
+ top_p=top_p,
80
+ top_k=top_k,
81
+ temperature=temperature,
82
+ num_beams=1,
83
+ repetition_penalty=repetition_penalty,
84
  )
85
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
86
+ t.start()
87
+
88
+ outputs = []
89
+ for text in streamer:
90
+ outputs.append(text)
91
+ yield "".join(outputs)
92
+
93
+
94
+ chat_interface = gr.ChatInterface(
95
+ fn=generate,
96
+ chatbot=chatbot,
97
+ additional_inputs=[
98
+ gr.Slider(
99
+ label="Max new tokens",
100
+ minimum=1,
101
+ maximum=MAX_MAX_NEW_TOKENS,
102
+ step=1,
103
+ value=DEFAULT_MAX_NEW_TOKENS,
104
+ ),
105
+ gr.Slider(
106
+ label="Temperature",
107
+ minimum=0.1,
108
+ maximum=4.0,
109
+ step=0.1,
110
+ value=0.6,
111
+ ),
112
+ gr.Slider(
113
+ label="Top-p (nucleus sampling)",
114
+ minimum=0.05,
115
+ maximum=1.0,
116
+ step=0.05,
117
+ value=0.9,
118
+ ),
119
+ gr.Slider(
120
+ label="Top-k",
121
+ minimum=1,
122
+ maximum=1000,
123
+ step=1,
124
+ value=50,
125
+ ),
126
+ gr.Slider(
127
+ label="Repetition penalty",
128
+ minimum=1.0,
129
+ maximum=2.0,
130
+ step=0.05,
131
+ value=1.2,
132
+ ),
133
+ ],
134
+ stop_btn=None,
135
+ examples=[
136
+ ["Apa yang dimaksud dengan RPJMN"],
137
+ ["Jelaskan tentang RPJMN 2020-2024"],
138
+ ["Apa peran RKP 2021 dan 20211 dalam RPJM 2020-2024"],
139
+ ["Apa saja program prioritas RPJMN 2020-2024"],
140
+ ],
141
+ )
142
+
143
  DESCRIPTION = '''
144
  <div style="padding: 5px; text-align: left; display: flex; flex-direction: column; align-items: left;">
145
  <img src="https://sdgs.bappenas.go.id/repository/assets/bappenas_logo_square.png" style="width: 40%; max-width: 200px; height: auto; opacity: 0.55; ">
 
156
  PLACEHOLDER = """
157
  <div style="padding: 100px; text-align: center; display: flex; flex-direction: column; align-items: center;">
158
  <img src="https://cdn3.iconfinder.com/data/icons/human-resources-flat-3/48/150-4096.png" style="width: 1000; max-width: 200px; height: auto; opacity: 0.55; ">
159
+ <h2 style="font-size: 20px; margin-bottom: 2px; opacity: 0.55;">Asisten Virtual Perencana</h2>
160
  <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Silakan mulai tanya...</p>
161
  </div>
162
  """
 
174
  border-radius: 100vh;
175
  }
176
  """
 
177
  chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Interlinked Sytem ChatInterface')
178
 
179
+ with gr.Blocks(css=css, fill_height=True) as demo:
 
180
  gr.Markdown(DESCRIPTION)
181
+ #gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
182
+ chat_interface.render()
183
+
 
 
 
 
 
184
  if __name__ == "__main__":
185
+ demo.queue(max_size=20).launch()