Update app.py
Browse files
app.py
CHANGED
@@ -51,10 +51,14 @@ system_instructions1 = """
|
|
51 |
Keep conversation friendly, short, clear, and concise.
|
52 |
Avoid unnecessary introductions and answer the user's questions directly.
|
53 |
Respond in a normal, conversational manner while being friendly and helpful.
|
|
|
54 |
[USER]
|
55 |
"""
|
56 |
|
|
|
|
|
57 |
def models(text, model="Llama 3B Service", seed=42):
|
|
|
58 |
seed = int(randomize_seed_fn(seed))
|
59 |
generator = torch.Generator().manual_seed(seed)
|
60 |
|
@@ -63,25 +67,48 @@ def models(text, model="Llama 3B Service", seed=42):
|
|
63 |
if "Llama 3B Service" in model:
|
64 |
messages = [
|
65 |
{"role": "system", "content": system_instructions1},
|
|
|
66 |
{"role": "user", "content": text}
|
67 |
]
|
68 |
completion = client.chat.completions.create(
|
69 |
model="/data/shared/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/",
|
70 |
messages=messages
|
71 |
)
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
else:
|
|
|
|
|
|
|
|
|
74 |
generate_kwargs = dict(
|
75 |
max_new_tokens=300,
|
76 |
seed=seed
|
77 |
)
|
78 |
-
formatted_prompt = system_instructions1 + text + "[OPTIMUS]"
|
79 |
stream = client.text_generation(
|
80 |
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
81 |
output = ""
|
82 |
for response in stream:
|
83 |
if not response.token.text == "</s>":
|
84 |
output += response.token.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
return output
|
86 |
|
87 |
async def respond(audio, model, seed):
|
@@ -123,7 +150,7 @@ def translate_speech(audio_file, target_language):
|
|
123 |
"--tgt_lang", language_code,
|
124 |
"--model_name", "seamless_expressivity",
|
125 |
"--vocoder_name", "vocoder_pretssel",
|
126 |
-
"--gated-model-dir", "
|
127 |
"--output_path", output_file
|
128 |
]
|
129 |
|
@@ -166,6 +193,15 @@ with gr.Blocks(css="style.css") as demo:
|
|
166 |
interactive=False,
|
167 |
autoplay=True,
|
168 |
elem_classes="audio")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
gr.Interface(
|
170 |
fn=respond,
|
171 |
inputs=[input, select, seed],
|
|
|
51 |
Keep conversation friendly, short, clear, and concise.
|
52 |
Avoid unnecessary introductions and answer the user's questions directly.
|
53 |
Respond in a normal, conversational manner while being friendly and helpful.
|
54 |
+
Remember previous parts of the conversation and use that context in your responses.
|
55 |
[USER]
|
56 |
"""
|
57 |
|
58 |
+
conversation_history = []
|
59 |
+
|
60 |
def models(text, model="Llama 3B Service", seed=42):
|
61 |
+
global conversation_history
|
62 |
seed = int(randomize_seed_fn(seed))
|
63 |
generator = torch.Generator().manual_seed(seed)
|
64 |
|
|
|
67 |
if "Llama 3B Service" in model:
|
68 |
messages = [
|
69 |
{"role": "system", "content": system_instructions1},
|
70 |
+
] + conversation_history + [
|
71 |
{"role": "user", "content": text}
|
72 |
]
|
73 |
completion = client.chat.completions.create(
|
74 |
model="/data/shared/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/",
|
75 |
messages=messages
|
76 |
)
|
77 |
+
assistant_response = completion.choices[0].message.content
|
78 |
+
|
79 |
+
# Update conversation history
|
80 |
+
conversation_history.append({"role": "user", "content": text})
|
81 |
+
conversation_history.append({"role": "assistant", "content": assistant_response})
|
82 |
+
|
83 |
+
# Keep only the last 10 messages to avoid token limit issues
|
84 |
+
if len(conversation_history) > 20:
|
85 |
+
conversation_history = conversation_history[-20:]
|
86 |
+
|
87 |
+
return assistant_response
|
88 |
else:
|
89 |
+
# For other models, we'll concatenate the conversation history into a single string
|
90 |
+
history_text = "\n".join([f"{'User' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}" for msg in conversation_history])
|
91 |
+
formatted_prompt = f"{system_instructions1}\n\nConversation history:\n{history_text}\n\nUser: {text}\nOPTIMUS:"
|
92 |
+
|
93 |
generate_kwargs = dict(
|
94 |
max_new_tokens=300,
|
95 |
seed=seed
|
96 |
)
|
|
|
97 |
stream = client.text_generation(
|
98 |
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
99 |
output = ""
|
100 |
for response in stream:
|
101 |
if not response.token.text == "</s>":
|
102 |
output += response.token.text
|
103 |
+
|
104 |
+
# Update conversation history
|
105 |
+
conversation_history.append({"role": "user", "content": text})
|
106 |
+
conversation_history.append({"role": "assistant", "content": output})
|
107 |
+
|
108 |
+
# Keep only the last 10 messages to avoid token limit issues
|
109 |
+
if len(conversation_history) > 20:
|
110 |
+
conversation_history = conversation_history[-20:]
|
111 |
+
|
112 |
return output
|
113 |
|
114 |
async def respond(audio, model, seed):
|
|
|
150 |
"--tgt_lang", language_code,
|
151 |
"--model_name", "seamless_expressivity",
|
152 |
"--vocoder_name", "vocoder_pretssel",
|
153 |
+
"--gated-model-dir", "seamlessmodel",
|
154 |
"--output_path", output_file
|
155 |
]
|
156 |
|
|
|
193 |
interactive=False,
|
194 |
autoplay=True,
|
195 |
elem_classes="audio")
|
196 |
+
clear_button = gr.Button("Clear Conversation History")
|
197 |
+
|
198 |
+
def clear_history():
|
199 |
+
global conversation_history
|
200 |
+
conversation_history = []
|
201 |
+
return "Conversation history cleared."
|
202 |
+
|
203 |
+
clear_button.click(fn=clear_history, inputs=[], outputs=gr.Textbox())
|
204 |
+
|
205 |
gr.Interface(
|
206 |
fn=respond,
|
207 |
inputs=[input, select, seed],
|