alishafique commited on
Commit
a12cddf
·
verified ·
1 Parent(s): e60808c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -18
app.py CHANGED
@@ -24,23 +24,11 @@ llama = Llama(
24
  )
25
 
26
 
27
-
28
-
29
  # Function to generate responses
30
  def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
31
-
32
- prompt_messages = system_prompt
33
-
34
- # Add history and the current message
35
- for user, bot in history:
36
- prompt_messages = prompt_messages + f"\n### Instruction: {user}\n### Response: {bot}"
37
-
38
-
39
- prompt_messages = prompt_messages + f"\n### Instruction: {message}\n### Response: "
40
-
41
- print(prompt_messages)
42
-
43
- response = llama(prompt_messages, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True)
44
 
45
  text = ""
46
  for chunk in response:
@@ -49,6 +37,29 @@ def generate_response(message, history, system_prompt, temperature, max_new_toke
49
  text += content
50
  yield text
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
  # JavaScript function for `on_load`
@@ -85,13 +96,13 @@ with gr.Blocks(js=on_load, theme=gr.themes.Default()) as demo:
85
  ],
86
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
87
  additional_inputs=[
88
- gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu.", label="System prompt", render=False),
89
  gr.Slider(0, 1, 0.8, label="Temperature", render=False),
90
- gr.Slider(128, 4096, 1024, label="Max new tokens", render=False),
91
  gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
92
  gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
93
  gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
94
  ],
95
  )
96
 
97
- demo.queue(max_size=10).launch(share=True)
 
24
  )
25
 
26
 
 
 
27
  # Function to generate responses
28
  def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
29
+ # chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
30
+ chat_prompt = f"{system_prompt}\n ### Instruction: {message}\n ### Response:"
31
+ response = llama(chat_prompt, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True)
 
 
 
 
 
 
 
 
 
 
32
 
33
  text = ""
34
  for chunk in response:
 
37
  text += content
38
  yield text
39
 
40
+ # def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
41
+ # """Generates a streaming response from the Llama model."""
42
+ # messages = [
43
+ # {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
44
+ # ]
45
+
46
+ # # Add history and the current message
47
+ # #for user, bot in history:
48
+ # #messages.append({"role": "user", "content": user})
49
+ # #messages.append({"role": "assistant", "content": bot})
50
+
51
+ # messages.append({"role": "user", "content": message})
52
+
53
+ # response = llama.create_chat_completion(
54
+ # messages=messages,
55
+ # stream=True,
56
+ # )
57
+
58
+ # partial_message = ""
59
+ # for part in response:
60
+ # content = part["choices"][0]["delta"].get("content", "")
61
+ # partial_message += content
62
+ # yield partial_message
63
 
64
 
65
  # JavaScript function for `on_load`
 
96
  ],
97
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
98
  additional_inputs=[
99
+ gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu. Your response should be extremely comprehensive", label="System prompt", render=False),
100
  gr.Slider(0, 1, 0.8, label="Temperature", render=False),
101
+ gr.Slider(128, 4096, 2048, label="Max new tokens", render=False),
102
  gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
103
  gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
104
  gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
105
  ],
106
  )
107
 
108
+ demo.queue(max_size=10).launch(share=True)