alishafique commited on
Commit
e60808c
·
verified ·
1 Parent(s): 62c7530

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -24,11 +24,23 @@ llama = Llama(
24
  )
25
 
26
 
 
 
27
  # Function to generate responses
28
  def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
29
- # chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
30
- chat_prompt = f"{system_prompt}\n ### Instruction: {message}\n ### Response:"
31
- response = llama(chat_prompt, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True)
 
 
 
 
 
 
 
 
 
 
32
 
33
  text = ""
34
  for chunk in response:
@@ -37,29 +49,6 @@ def generate_response(message, history, system_prompt, temperature, max_new_toke
37
  text += content
38
  yield text
39
 
40
- # def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
41
- # """Generates a streaming response from the Llama model."""
42
- # messages = [
43
- # {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
44
- # ]
45
-
46
- # # Add history and the current message
47
- # #for user, bot in history:
48
- # #messages.append({"role": "user", "content": user})
49
- # #messages.append({"role": "assistant", "content": bot})
50
-
51
- # messages.append({"role": "user", "content": message})
52
-
53
- # response = llama.create_chat_completion(
54
- # messages=messages,
55
- # stream=True,
56
- # )
57
-
58
- # partial_message = ""
59
- # for part in response:
60
- # content = part["choices"][0]["delta"].get("content", "")
61
- # partial_message += content
62
- # yield partial_message
63
 
64
 
65
  # JavaScript function for `on_load`
@@ -98,7 +87,7 @@ with gr.Blocks(js=on_load, theme=gr.themes.Default()) as demo:
98
  additional_inputs=[
99
  gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu.", label="System prompt", render=False),
100
  gr.Slider(0, 1, 0.8, label="Temperature", render=False),
101
- gr.Slider(128, 4096, 512, label="Max new tokens", render=False),
102
  gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
103
  gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
104
  gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
 
24
  )
25
 
26
 
27
+
28
+
29
  # Function to generate responses
30
  def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
31
+
32
+ prompt_messages = system_prompt
33
+
34
+ # Add history and the current message
35
+ for user, bot in history:
36
+ prompt_messages = prompt_messages + f"\n### Instruction: {user}\n### Response: {bot}"
37
+
38
+
39
+ prompt_messages = prompt_messages + f"\n### Instruction: {message}\n### Response: "
40
+
41
+ print(prompt_messages)
42
+
43
+ response = llama(prompt_messages, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True)
44
 
45
  text = ""
46
  for chunk in response:
 
49
  text += content
50
  yield text
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
  # JavaScript function for `on_load`
 
87
  additional_inputs=[
88
  gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu.", label="System prompt", render=False),
89
  gr.Slider(0, 1, 0.8, label="Temperature", render=False),
90
+ gr.Slider(128, 4096, 1024, label="Max new tokens", render=False),
91
  gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
92
  gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
93
  gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),