alishafique commited on
Commit
a28de04
·
verified ·
1 Parent(s): 8f689f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -19
app.py CHANGED
@@ -37,29 +37,63 @@ llama = Llama(
37
  # text += content
38
  # yield text
39
 
40
- def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
41
- """Generates a streaming response from the Llama model."""
42
- messages = [
43
- {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
44
- ]
45
 
46
- # Add history and the current message
47
- for user, bot in history:
48
- messages.append({"role": "user", "content": user})
49
- messages.append({"role": "assistant", "content": bot})
50
 
51
- messages.append({"role": "user", "content": message})
52
 
53
- response = llama.create_chat_completion(
54
- messages=messages,
55
- stream=True,
56
- )
57
 
58
- partial_message = ""
59
- for part in response:
60
- content = part["choices"][0]["delta"].get("content", "")
61
- partial_message += content
62
- yield partial_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  # JavaScript function for `on_load`
 
37
  # text += content
38
  # yield text
39
 
40
+ # def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
41
+ # """Generates a streaming response from the Llama model."""
42
+ # messages = [
43
+ # {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
44
+ # ]
45
 
46
+ # # Add history and the current message
47
+ # for user, bot in history:
48
+ # messages.append({"role": "user", "content": user})
49
+ # messages.append({"role": "assistant", "content": bot})
50
 
51
+ # messages.append({"role": "user", "content": message})
52
 
53
+ # response = llama.create_chat_completion(
54
+ # messages=messages,
55
+ # stream=True,
56
+ # )
57
 
58
+ # partial_message = ""
59
+ # for part in response:
60
+ # content = part["choices"][0]["delta"].get("content", "")
61
+ # partial_message += content
62
+ # yield partial_message
63
+
64
+
65
+ def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
66
+ """Generates a streaming response from the Llama model using Alpaca chat template."""
67
+
68
+ # Start with system prompt
69
+ chat_prompt = system_prompt or "You are an Urdu Chatbot. Write an appropriate response for the given instruction."
70
+ chat_prompt += "\n"
71
+
72
+ # Add history to the prompt
73
+ for user, bot in history:
74
+ chat_prompt += f"\n### Instruction:\n{user}\n\n### Response:\n{bot}\n"
75
+
76
+ # Add current message
77
+ chat_prompt += f"\n### Instruction:\n{message}\n\n### Response:\n"
78
+
79
+ response = llama(
80
+ chat_prompt,
81
+ temperature=temperature,
82
+ max_tokens=max_new_tokens,
83
+ top_k=top_k,
84
+ repeat_penalty=repetition_penalty,
85
+ top_p=top_p,
86
+ stop=["###", "### Instruction:", "\n### Instruction:", "Q:"],
87
+ echo=False,
88
+ stream=True
89
+ )
90
+
91
+ text = ""
92
+ for chunk in response:
93
+ content = chunk["choices"][0]["text"]
94
+ if content:
95
+ text += content
96
+ yield text
97
 
98
 
99
  # JavaScript function for `on_load`