Spaces:

Corvius
/

LLaMA-3.1-405B-Instruct

Runtime error

App Files Files Community

Corvius commited on Jul 26, 2024

Commit

1872449

verified ·

1 Parent(s): 7324de2

un-retard the stop button

Browse files

Files changed (1) hide show

app.py +34 -28

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import requests
 import json
 import threading
 import os
 stop_generation = threading.Event()
 API_URL = os.environ.get('API_URL')
@@ -13,8 +14,10 @@ headers = {
     "Content-Type": "application/json"
 }
 def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
-    global stop_generation
     stop_generation.clear()
     history_format = [{"role": "system", "content": system_prompt}]
@@ -37,32 +40,34 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
         "max_tokens": max_tokens
     }
-    response = requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True)
-    print("Raw API Response:")
-    print(response.text)
-    partial_message = ""
-    for line in response.iter_lines():
-        if stop_generation.is_set():
-            break
-        if line:
-            line = line.decode('utf-8')
-            if line.startswith("data: "):
-                if line.strip() == "data: [DONE]":
                     break
-                try:
-                    json_data = json.loads(line[6:])
-                    if 'choices' in json_data and json_data['choices']:
-                        content = json_data['choices'][0]['delta'].get('content', '')
-                        if content:
-                            partial_message += content
-                            yield partial_message
-                except json.JSONDecodeError:
-                    continue
-    if partial_message:
-        yield partial_message
 def import_chat(custom_format_string):
     try:
@@ -98,9 +103,10 @@ def export_chat(history, system_prompt):
     return export_data
 def stop_generation_func():
-    global stop_generation
     stop_generation.set()
 with gr.Blocks(theme='gradio/monochrome') as demo:
     with gr.Row():

 import json
 import threading
 import os
+from requests.exceptions import RequestException
 stop_generation = threading.Event()
 API_URL = os.environ.get('API_URL')
     "Content-Type": "application/json"
 }
+session = requests.Session()
 def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
+    global stop_generation, session
     stop_generation.clear()
     history_format = [{"role": "system", "content": system_prompt}]
         "max_tokens": max_tokens
     }
+    try:
+        with session.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
+            partial_message = ""
+            for line in response.iter_lines():
+                if stop_generation.is_set():
+                    response.close()
                     break
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith("data: "):
+                        if line.strip() == "data: [DONE]":
+                            break
+                        try:
+                            json_data = json.loads(line[6:])
+                            if 'choices' in json_data and json_data['choices']:
+                                content = json_data['choices'][0]['delta'].get('content', '')
+                                if content:
+                                    partial_message += content
+                                    yield partial_message
+                        except json.JSONDecodeError:
+                            continue
+        if partial_message:
+            yield partial_message
+    except RequestException as e:
+        print(f"Request error: {e}")
+        yield f"An error occurred: {str(e)}"
 def import_chat(custom_format_string):
     try:
     return export_data
 def stop_generation_func():
+    global stop_generation, session
     stop_generation.set()
+    session.close()
+    session = requests.Session()
 with gr.Blocks(theme='gradio/monochrome') as demo:
     with gr.Row():