Jaward commited on
Commit
a0024f7
·
verified ·
1 Parent(s): 4a201a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -3
app.py CHANGED
@@ -51,10 +51,14 @@ system_instructions1 = """
51
  Keep conversation friendly, short, clear, and concise.
52
  Avoid unnecessary introductions and answer the user's questions directly.
53
  Respond in a normal, conversational manner while being friendly and helpful.
 
54
  [USER]
55
  """
56
 
 
 
57
  def models(text, model="Llama 3B Service", seed=42):
 
58
  seed = int(randomize_seed_fn(seed))
59
  generator = torch.Generator().manual_seed(seed)
60
 
@@ -63,25 +67,48 @@ def models(text, model="Llama 3B Service", seed=42):
63
  if "Llama 3B Service" in model:
64
  messages = [
65
  {"role": "system", "content": system_instructions1},
 
66
  {"role": "user", "content": text}
67
  ]
68
  completion = client.chat.completions.create(
69
  model="/data/shared/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/",
70
  messages=messages
71
  )
72
- return completion.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
73
  else:
 
 
 
 
74
  generate_kwargs = dict(
75
  max_new_tokens=300,
76
  seed=seed
77
  )
78
- formatted_prompt = system_instructions1 + text + "[OPTIMUS]"
79
  stream = client.text_generation(
80
  formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
81
  output = ""
82
  for response in stream:
83
  if not response.token.text == "</s>":
84
  output += response.token.text
 
 
 
 
 
 
 
 
 
85
  return output
86
 
87
  async def respond(audio, model, seed):
@@ -123,7 +150,7 @@ def translate_speech(audio_file, target_language):
123
  "--tgt_lang", language_code,
124
  "--model_name", "seamless_expressivity",
125
  "--vocoder_name", "vocoder_pretssel",
126
- "--gated-model-dir", "models",
127
  "--output_path", output_file
128
  ]
129
 
@@ -166,6 +193,15 @@ with gr.Blocks(css="style.css") as demo:
166
  interactive=False,
167
  autoplay=True,
168
  elem_classes="audio")
 
 
 
 
 
 
 
 
 
169
  gr.Interface(
170
  fn=respond,
171
  inputs=[input, select, seed],
 
51
  Keep conversation friendly, short, clear, and concise.
52
  Avoid unnecessary introductions and answer the user's questions directly.
53
  Respond in a normal, conversational manner while being friendly and helpful.
54
+ Remember previous parts of the conversation and use that context in your responses.
55
  [USER]
56
  """
57
 
58
+ conversation_history = []
59
+
60
  def models(text, model="Llama 3B Service", seed=42):
61
+ global conversation_history
62
  seed = int(randomize_seed_fn(seed))
63
  generator = torch.Generator().manual_seed(seed)
64
 
 
67
  if "Llama 3B Service" in model:
68
  messages = [
69
  {"role": "system", "content": system_instructions1},
70
+ ] + conversation_history + [
71
  {"role": "user", "content": text}
72
  ]
73
  completion = client.chat.completions.create(
74
  model="/data/shared/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/",
75
  messages=messages
76
  )
77
+ assistant_response = completion.choices[0].message.content
78
+
79
+ # Update conversation history
80
+ conversation_history.append({"role": "user", "content": text})
81
+ conversation_history.append({"role": "assistant", "content": assistant_response})
82
+
83
+ # Keep only the last 10 messages to avoid token limit issues
84
+ if len(conversation_history) > 20:
85
+ conversation_history = conversation_history[-20:]
86
+
87
+ return assistant_response
88
  else:
89
+ # For other models, we'll concatenate the conversation history into a single string
90
+ history_text = "\n".join([f"{'User' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}" for msg in conversation_history])
91
+ formatted_prompt = f"{system_instructions1}\n\nConversation history:\n{history_text}\n\nUser: {text}\nOPTIMUS:"
92
+
93
  generate_kwargs = dict(
94
  max_new_tokens=300,
95
  seed=seed
96
  )
 
97
  stream = client.text_generation(
98
  formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
99
  output = ""
100
  for response in stream:
101
  if not response.token.text == "</s>":
102
  output += response.token.text
103
+
104
+ # Update conversation history
105
+ conversation_history.append({"role": "user", "content": text})
106
+ conversation_history.append({"role": "assistant", "content": output})
107
+
108
+ # Keep only the last 10 messages to avoid token limit issues
109
+ if len(conversation_history) > 20:
110
+ conversation_history = conversation_history[-20:]
111
+
112
  return output
113
 
114
  async def respond(audio, model, seed):
 
150
  "--tgt_lang", language_code,
151
  "--model_name", "seamless_expressivity",
152
  "--vocoder_name", "vocoder_pretssel",
153
+ "--gated-model-dir", "seamlessmodel",
154
  "--output_path", output_file
155
  ]
156
 
 
193
  interactive=False,
194
  autoplay=True,
195
  elem_classes="audio")
196
+ clear_button = gr.Button("Clear Conversation History")
197
+
198
+ def clear_history():
199
+ global conversation_history
200
+ conversation_history = []
201
+ return "Conversation history cleared."
202
+
203
+ clear_button.click(fn=clear_history, inputs=[], outputs=gr.Textbox())
204
+
205
  gr.Interface(
206
  fn=respond,
207
  inputs=[input, select, seed],