Spaces:

Corvius
/

LLaMA-3.1-405B-Instruct

Runtime error

App Files Files Community

Corvius commited on Sep 10, 2024

Commit

2c15d27

verified ·

1 Parent(s): d542f92

????????wtf

Browse files

Files changed (1) hide show

app.py +86 -64

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import datetime
 import asyncio
 import aiohttp
-from aiohttp import ClientSession
 API_URL = os.environ.get('API_URL')
 API_KEY = os.environ.get('API_KEY')
@@ -24,8 +24,6 @@ DEFAULT_PARAMS = {
     "max_tokens": 512
 }
-active_tasks = {}
 def get_timestamp():
     return datetime.datetime.now().strftime("%H:%M:%S")
@@ -71,11 +69,13 @@ async def predict(message, history, system_prompt, temperature, top_p, top_k, fr
     }
     try:
-        async with ClientSession() as session:
             async with session.post(API_URL, headers=headers, json=data) as response:
                 partial_message = ""
                 async for line in response.content:
                     if asyncio.current_task().cancelled():
                         break
                     if line:
                         line = line.decode('utf-8')
@@ -95,6 +95,9 @@ async def predict(message, history, system_prompt, temperature, top_p, top_k, fr
         if partial_message:
             yield partial_message
     except Exception as e:
         print(f"Request error: {e}")
         yield f"An error occurred: {str(e)}"
@@ -135,9 +138,70 @@ def export_chat(history, system_prompt):
 def sanitize_chatbot_history(history):
     """Ensure each entry in the chatbot history is a tuple of two items."""
-    return [tuple(entry[:2]) for entry in history]
 with gr.Blocks(theme='gradio/monochrome') as demo:
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(value=[])
@@ -163,85 +227,43 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
             repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
             max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
-    async def user(user_message, history):
-        history = sanitize_chatbot_history(history or [])
-        return "", history + [(user_message, None)]
-    async def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-        history = sanitize_chatbot_history(history or [])
-        if not history:
-            yield history
-            return
-        user_message = history[-1][0]
-        bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens)
-        history[-1] = (history[-1][0], "")  # Ensure it's a tuple
-        task_id = id(asyncio.current_task())
-        active_tasks[task_id] = asyncio.current_task()
-        try:
-            async for chunk in bot_message:
-                if task_id not in active_tasks:
-                    break
-                history[-1] = (history[-1][0], chunk)  # Update as a tuple
-                yield history
-        except asyncio.CancelledError:
-            pass
-        finally:
-            if task_id in active_tasks:
-                del active_tasks[task_id]
-            if history[-1][1] == "":
-                history[-1] = (history[-1][0], " [Generation stopped]")
-            yield history
-    async def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-        # Cancel any ongoing generation
-        for task in list(active_tasks.values()):
-            task.cancel()
-        # Wait for a short time to ensure cancellation is processed
-        await asyncio.sleep(0.1)
-        history = sanitize_chatbot_history(history or [])
-        if history:
-            history[-1] = (history[-1][0], None)  # Reset last response
-            async for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-                yield new_history
-        else:
-            yield []
-    def import_chat_wrapper(custom_format_string):
-        imported_history, imported_system_prompt = import_chat(custom_format_string)
-        return sanitize_chatbot_history(imported_history), imported_system_prompt
     submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens], chatbot,
-        concurrency_limit=5
     )
     clear.click(lambda: [], None, chatbot, queue=False)
     regenerate_event = regenerate.click(
         regenerate_response,
-        [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens],
         chatbot,
-        concurrency_limit=5
     )
-    import_button.click(import_chat_wrapper, inputs=[import_textbox], outputs=[chatbot, system_prompt], concurrency_limit=5)
     export_button.click(
         export_chat,
         inputs=[chatbot, system_prompt],
         outputs=[import_textbox],
-        concurrency_limit=5
     )
     stop_btn.click(
-        lambda: [task.cancel() for task in list(active_tasks.values())],
-        None,
-        None,
         cancels=[submit_event, regenerate_event],
         queue=False
     )
 if __name__ == "__main__":
-    demo.launch(debug=True, max_threads=20)

 import datetime
 import asyncio
 import aiohttp
+from aiohttp import ClientSession, ClientTimeout
 API_URL = os.environ.get('API_URL')
 API_KEY = os.environ.get('API_KEY')
     "max_tokens": 512
 }
 def get_timestamp():
     return datetime.datetime.now().strftime("%H:%M:%S")
     }
     try:
+        timeout = ClientTimeout(total=60)  # Set a 60-second timeout
+        async with ClientSession(timeout=timeout) as session:
             async with session.post(API_URL, headers=headers, json=data) as response:
                 partial_message = ""
                 async for line in response.content:
                     if asyncio.current_task().cancelled():
+                        print("Task cancelled during API request")
                         break
                     if line:
                         line = line.decode('utf-8')
         if partial_message:
             yield partial_message
+    except asyncio.TimeoutError:
+        print("Request timed out")
+        yield "Request timed out. Please try again."
     except Exception as e:
         print(f"Request error: {e}")
         yield f"An error occurred: {str(e)}"
 def sanitize_chatbot_history(history):
     """Ensure each entry in the chatbot history is a tuple of two items."""
+    return [tuple(entry[:2]) if isinstance(entry, (list, tuple)) else (str(entry), None) for entry in history]
+async def user(user_message, history):
+    history = sanitize_chatbot_history(history or [])
+    return "", history + [(user_message, None)]
+async def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info):
+    history = sanitize_chatbot_history(history or [])
+    if not history:
+        yield history
+        return
+    user_message = history[-1][0]
+    bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens)
+    history[-1] = (history[-1][0], "")
+    task = asyncio.current_task()
+    task_info['task'] = task
+    task_info['stop_requested'] = False
+    try:
+        async for chunk in bot_message:
+            if task_info.get('stop_requested', False):
+                print("Stop requested, breaking the loop")
+                break
+            history[-1] = (history[-1][0], chunk)
+            yield history
+    except asyncio.CancelledError:
+        print("Bot generation cancelled")
+    except GeneratorExit:
+        print("Generator exited")
+    except Exception as e:
+        print(f"Error in bot generation: {e}")
+    finally:
+        if history[-1][1] == "":
+            history[-1] = (history[-1][0], " [Generation stopped]")
+        task_info['task'] = None
+        task_info['stop_requested'] = False
+        yield history
+async def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info):
+    if 'task' in task_info and task_info['task']:
+        print("Cancelling previous task")
+        task_info['stop_requested'] = True
+        task_info['task'].cancel()
+    await asyncio.sleep(0.1)
+    history = sanitize_chatbot_history(history or [])
+    if history:
+        history[-1] = (history[-1][0], None)
+        try:
+            async for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info):
+                yield sanitize_chatbot_history(new_history)
+        except Exception as e:
+            print(f"Error in regenerate_response: {e}")
+            yield history
+    else:
+        yield []
+def import_chat_wrapper(custom_format_string):
+    imported_history, imported_system_prompt = import_chat(custom_format_string)
+    return sanitize_chatbot_history(imported_history), imported_system_prompt
 with gr.Blocks(theme='gradio/monochrome') as demo:
+    task_info = gr.State({'task': None, 'stop_requested': False})
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(value=[])
             repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
             max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
     submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info], chatbot,
+        concurrency_limit=10
     )
     clear.click(lambda: [], None, chatbot, queue=False)
     regenerate_event = regenerate.click(
         regenerate_response,
+        [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info],
         chatbot,
+        concurrency_limit=10
     )
+    import_button.click(import_chat_wrapper, inputs=[import_textbox], outputs=[chatbot, system_prompt], concurrency_limit=10)
     export_button.click(
         export_chat,
         inputs=[chatbot, system_prompt],
         outputs=[import_textbox],
+        concurrency_limit=10
     )
+    def stop_generation(task_info):
+        if 'task' in task_info and task_info['task']:
+            print("Stop requested")
+            task_info['stop_requested'] = True
+            task_info['task'].cancel()
+        return task_info
     stop_btn.click(
+        stop_generation,
+        inputs=[task_info],
+        outputs=[task_info],
         cancels=[submit_event, regenerate_event],
         queue=False
     )
 if __name__ == "__main__":
+    demo.launch(debug=True, max_threads=40)