Update app.py
Browse files
app.py
CHANGED
@@ -71,19 +71,18 @@ def generate(prompt, history=[], temperature=0.1, max_new_tokens=10000, top_p=0.
|
|
71 |
available_tokens = 32768 - total_tokens_used
|
72 |
|
73 |
if available_tokens <= 0:
|
74 |
-
|
|
|
75 |
|
76 |
formatted_prompt = format_prompt(prompt, history)
|
77 |
try:
|
78 |
stream = client.text_generation(formatted_prompt, temperature=temperature, max_new_tokens=min(max_new_tokens, available_tokens),
|
79 |
top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, stream=True)
|
80 |
-
output = ""
|
81 |
for response in stream:
|
82 |
-
output
|
83 |
-
|
84 |
except Exception as e:
|
85 |
-
|
86 |
-
|
87 |
|
88 |
mychatbot = gr.Chatbot(
|
89 |
avatar_images=["./user.png", "./botm.png"],
|
|
|
71 |
available_tokens = 32768 - total_tokens_used
|
72 |
|
73 |
if available_tokens <= 0:
|
74 |
+
yield f"Error: 입력이 최대 허용 토큰 수를 초과합니다. Total tokens used: {total_tokens_used}"
|
75 |
+
return
|
76 |
|
77 |
formatted_prompt = format_prompt(prompt, history)
|
78 |
try:
|
79 |
stream = client.text_generation(formatted_prompt, temperature=temperature, max_new_tokens=min(max_new_tokens, available_tokens),
|
80 |
top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, stream=True)
|
|
|
81 |
for response in stream:
|
82 |
+
output = response['generated_text'] if 'generated_text' in response else str(response)
|
83 |
+
yield f"{output}\n\n---\nTotal tokens used: {total_tokens_used}"
|
84 |
except Exception as e:
|
85 |
+
yield f"Error: {str(e)}\nTotal tokens used: {total_tokens_used}"
|
|
|
86 |
|
87 |
mychatbot = gr.Chatbot(
|
88 |
avatar_images=["./user.png", "./botm.png"],
|