Spaces:

Jaward
/

optimus

Running

App Files Files Community

Jaward commited on Aug 15, 2024

Commit

a0024f7

verified ·

1 Parent(s): 4a201a6

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -3

app.py CHANGED Viewed

@@ -51,10 +51,14 @@ system_instructions1 = """
 Keep conversation friendly, short, clear, and concise.
 Avoid unnecessary introductions and answer the user's questions directly.
 Respond in a normal, conversational manner while being friendly and helpful.
 [USER]
 """
 def models(text, model="Llama 3B Service", seed=42):
     seed = int(randomize_seed_fn(seed))
     generator = torch.Generator().manual_seed(seed)
@@ -63,25 +67,48 @@ def models(text, model="Llama 3B Service", seed=42):
     if "Llama 3B Service" in model:
         messages = [
             {"role": "system", "content": system_instructions1},
             {"role": "user", "content": text}
         ]
         completion = client.chat.completions.create(
             model="/data/shared/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/",
             messages=messages
         )
-        return completion.choices[0].message.content
     else:
         generate_kwargs = dict(
             max_new_tokens=300,
             seed=seed
         )
-        formatted_prompt = system_instructions1 + text + "[OPTIMUS]"
         stream = client.text_generation(
             formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         output = ""
         for response in stream:
             if not response.token.text == "</s>":
                 output += response.token.text
         return output
 async def respond(audio, model, seed):
@@ -123,7 +150,7 @@ def translate_speech(audio_file, target_language):
         "--tgt_lang", language_code,
         "--model_name", "seamless_expressivity",
         "--vocoder_name", "vocoder_pretssel",
-        "--gated-model-dir", "models",
         "--output_path", output_file
     ]
@@ -166,6 +193,15 @@ with gr.Blocks(css="style.css") as demo:
                             interactive=False,
                             autoplay=True,
                             elem_classes="audio")
             gr.Interface(
                 fn=respond,
                 inputs=[input, select, seed],

 Keep conversation friendly, short, clear, and concise.
 Avoid unnecessary introductions and answer the user's questions directly.
 Respond in a normal, conversational manner while being friendly and helpful.
+Remember previous parts of the conversation and use that context in your responses.
 [USER]
 """
+conversation_history = []
 def models(text, model="Llama 3B Service", seed=42):
+    global conversation_history
     seed = int(randomize_seed_fn(seed))
     generator = torch.Generator().manual_seed(seed)
     if "Llama 3B Service" in model:
         messages = [
             {"role": "system", "content": system_instructions1},
+        ] + conversation_history + [
             {"role": "user", "content": text}
         ]
         completion = client.chat.completions.create(
             model="/data/shared/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/",
             messages=messages
         )
+        assistant_response = completion.choices[0].message.content
+        # Update conversation history
+        conversation_history.append({"role": "user", "content": text})
+        conversation_history.append({"role": "assistant", "content": assistant_response})
+        # Keep only the last 10 messages to avoid token limit issues
+        if len(conversation_history) > 20:
+            conversation_history = conversation_history[-20:]
+        return assistant_response
     else:
+        # For other models, we'll concatenate the conversation history into a single string
+        history_text = "\n".join([f"{'User' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}" for msg in conversation_history])
+        formatted_prompt = f"{system_instructions1}\n\nConversation history:\n{history_text}\n\nUser: {text}\nOPTIMUS:"
         generate_kwargs = dict(
             max_new_tokens=300,
             seed=seed
         )
         stream = client.text_generation(
             formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         output = ""
         for response in stream:
             if not response.token.text == "</s>":
                 output += response.token.text
+        # Update conversation history
+        conversation_history.append({"role": "user", "content": text})
+        conversation_history.append({"role": "assistant", "content": output})
+        # Keep only the last 10 messages to avoid token limit issues
+        if len(conversation_history) > 20:
+            conversation_history = conversation_history[-20:]
         return output
 async def respond(audio, model, seed):
         "--tgt_lang", language_code,
         "--model_name", "seamless_expressivity",
         "--vocoder_name", "vocoder_pretssel",
+        "--gated-model-dir", "seamlessmodel",
         "--output_path", output_file
     ]
                             interactive=False,
                             autoplay=True,
                             elem_classes="audio")
+            clear_button = gr.Button("Clear Conversation History")
+            def clear_history():
+                global conversation_history
+                conversation_history = []
+                return "Conversation history cleared."
+            clear_button.click(fn=clear_history, inputs=[], outputs=gr.Textbox())
             gr.Interface(
                 fn=respond,
                 inputs=[input, select, seed],