Spaces:

Corvius
/

LLaMA-3.1-405B-Instruct

Runtime error

App Files Files Community

Corvius commited on Sep 7, 2024

Commit

c58b4ed

verified ·

1 Parent(s): c7a2372

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -71

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import gradio as gr
-import aiohttp
-import asyncio
 import json
 import os
 import datetime
-import time
-from concurrent.futures import ThreadPoolExecutor
 API_URL = os.environ.get('API_URL')
 API_KEY = os.environ.get('API_KEY')
@@ -25,15 +24,12 @@ DEFAULT_PARAMS = {
     "max_tokens": 512
 }
-thread_pool = ThreadPoolExecutor(max_workers=10)
 def get_timestamp():
     return datetime.datetime.now().strftime("%H:%M:%S")
-should_stop = False
 async def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-    global should_stop
     history_format = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         history_format.append({"role": "user", "content": human})
@@ -56,7 +52,7 @@ async def predict(message, history, system_prompt, temperature, top_p, top_k, fr
     }
     non_default_params = {k: v for k, v in current_params.items() if v != DEFAULT_PARAMS[k]}
     if non_default_params and not message.startswith(('*', '"')):
         for param, value in non_default_params.items():
             print(f"{param}={value}")
@@ -74,28 +70,34 @@ async def predict(message, history, system_prompt, temperature, top_p, top_k, fr
         "max_tokens": max_tokens
     }
-    async with aiohttp.ClientSession() as session:
-        async with session.post(API_URL, headers=headers, json=data) as response:
-            partial_message = ""
-            async for line in response.content:
-                if should_stop:
-                    break
-                line = line.decode('utf-8')
-                if line.startswith("data: "):
-                    if line.strip() == "data: [DONE]":
                         break
-                    try:
-                        json_data = json.loads(line[6:])
-                        if 'choices' in json_data and json_data['choices']:
-                            content = json_data['choices'][0]['delta'].get('content', '')
-                            if content:
-                                partial_message += content
-                                yield partial_message
-                    except json.JSONDecodeError:
-                        continue
-    if partial_message:
-        yield partial_message
 def import_chat(custom_format_string):
     try:
@@ -131,12 +133,11 @@ def export_chat(history, system_prompt):
                 export_data += f"<|assistant|> {assistant_msg}\n\n"
     return export_data
-def stop_generation():
-    global should_stop
-    should_stop = True
-    return gr.update(interactive=True), gr.update(interactive=True)
-with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(value=[])
@@ -162,81 +163,85 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
             repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
             max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
-    def user(user_message, history):
-        history = history or []
-        return "", history + [[user_message, None]]
     async def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-        global should_stop
-        should_stop = False
-        history = history or []
         if not history:
             yield history
             return
         user_message = history[-1][0]
         bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens)
-        history[-1][1] = ""
         try:
             async for chunk in bot_message:
-                if should_stop:
                     break
-                history[-1][1] = chunk
                 yield history
-        except Exception as e:
-            print(f"Error in bot function: {str(e)}")
-            history[-1][1] = "An error occurred while generating the response."
-            yield history
         finally:
-            should_stop = False
     async def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-        global should_stop
-        should_stop = False
-        if history and len(history) > 0:
-            last_user_message = history[-1][0]
-            history[-1][1] = None
             async for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-                if should_stop:
-                    break
                 yield new_history
         else:
             yield []
-        should_stop = False
     def import_chat_wrapper(custom_format_string):
         imported_history, imported_system_prompt = import_chat(custom_format_string)
-        return imported_history, imported_system_prompt
     submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens], chatbot,
-        concurrency_limit=10
     )
-    clear.click(lambda: None, None, chatbot, queue=False)
     regenerate_event = regenerate.click(
         regenerate_response,
         [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens],
         chatbot,
-        concurrency_limit=10
     )
-    stop_btn.click(
-        stop_generation,
-        inputs=[],
-        outputs=[msg, regenerate],
-        cancels=[submit_event, regenerate_event],
-        queue=False
-    )
-    import_button.click(import_chat_wrapper, inputs=[import_textbox], outputs=[chatbot, system_prompt], queue=False)
     export_button.click(
         export_chat,
         inputs=[chatbot, system_prompt],
         outputs=[import_textbox],
         queue=False
     )
 if __name__ == "__main__":
-    demo.launch(debug=True, server_name="0.0.0.0", server_port=7860, share=True, max_threads=40)

 import gradio as gr
 import json
 import os
 import datetime
+import asyncio
+import aiohttp
+from aiohttp import ClientSession
 API_URL = os.environ.get('API_URL')
 API_KEY = os.environ.get('API_KEY')
     "max_tokens": 512
 }
+active_tasks = {}
 def get_timestamp():
     return datetime.datetime.now().strftime("%H:%M:%S")
 async def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
     history_format = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         history_format.append({"role": "user", "content": human})
     }
     non_default_params = {k: v for k, v in current_params.items() if v != DEFAULT_PARAMS[k]}
     if non_default_params and not message.startswith(('*', '"')):
         for param, value in non_default_params.items():
             print(f"{param}={value}")
         "max_tokens": max_tokens
     }
+    try:
+        async with ClientSession() as session:
+            async with session.post(API_URL, headers=headers, json=data) as response:
+                partial_message = ""
+                async for line in response.content:
+                    if asyncio.current_task().cancelled():
                         break
+                    if line:
+                        line = line.decode('utf-8')
+                        if line.startswith("data: "):
+                            if line.strip() == "data: [DONE]":
+                                break
+                            try:
+                                json_data = json.loads(line[6:])
+                                if 'choices' in json_data and json_data['choices']:
+                                    content = json_data['choices'][0]['delta'].get('content', '')
+                                    if content:
+                                        partial_message += content
+                                        yield partial_message
+                            except json.JSONDecodeError:
+                                continue
+        if partial_message:
+            yield partial_message
+    except Exception as e:
+        print(f"Request error: {e}")
+        yield f"An error occurred: {str(e)}"
 def import_chat(custom_format_string):
     try:
                 export_data += f"<|assistant|> {assistant_msg}\n\n"
     return export_data
+def sanitize_chatbot_history(history):
+    """Ensure each entry in the chatbot history is a tuple of two items."""
+    return [tuple(entry[:2]) for entry in history]
+with gr.Blocks(theme='gradio/monochrome') as demo:
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(value=[])
             repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
             max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
+    async def user(user_message, history):
+        history = sanitize_chatbot_history(history or [])
+        return "", history + [(user_message, None)]
     async def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
+        history = sanitize_chatbot_history(history or [])
         if not history:
             yield history
             return
         user_message = history[-1][0]
         bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens)
+        history[-1] = (history[-1][0], "")  # Ensure it's a tuple
+        task_id = id(asyncio.current_task())
+        active_tasks[task_id] = asyncio.current_task()
         try:
             async for chunk in bot_message:
+                if task_id not in active_tasks:
                     break
+                history[-1] = (history[-1][0], chunk)  # Update as a tuple
                 yield history
+        except asyncio.CancelledError:
+            pass
         finally:
+            if task_id in active_tasks:
+                del active_tasks[task_id]
+            if history[-1][1] == "":
+                history[-1] = (history[-1][0], " [Generation stopped]")
+            yield history
     async def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
+        # Cancel any ongoing generation
+        for task in list(active_tasks.values()):
+            task.cancel()
+        # Wait for a short time to ensure cancellation is processed
+        await asyncio.sleep(0.1)
+        history = sanitize_chatbot_history(history or [])
+        if history:
+            history[-1] = (history[-1][0], None)  # Reset last response
             async for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
                 yield new_history
         else:
             yield []
     def import_chat_wrapper(custom_format_string):
         imported_history, imported_system_prompt = import_chat(custom_format_string)
+        return sanitize_chatbot_history(imported_history), imported_system_prompt
     submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens], chatbot,
+        concurrency_limit=5
     )
+    clear.click(lambda: [], None, chatbot, queue=False)
     regenerate_event = regenerate.click(
         regenerate_response,
         [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens],
         chatbot,
+        concurrency_limit=5
     )
+    import_button.click(import_chat_wrapper, inputs=[import_textbox], outputs=[chatbot, system_prompt], concurrency_limit=5)
     export_button.click(
         export_chat,
         inputs=[chatbot, system_prompt],
         outputs=[import_textbox],
+        concurrency_limit=5
+    )
+    stop_btn.click(
+        lambda: [task.cancel() for task in list(active_tasks.values())],
+        None,
+        None,
+        cancels=[submit_event, regenerate_event],
         queue=False
     )
 if __name__ == "__main__":
+    demo.launch(debug=True, max_threads=20)