Spaces:

AugustLight
/

LLight-3.2-3b-Instruct

Sleeping

AugustLight commited on Oct 26, 2024

Commit

32b5fa7

verified ·

1 Parent(s): 0062f54

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,23 +45,26 @@ def respond(message, history, system_message, max_new_tokens, temperature, top_p
         print(f"Генерируем ответ для контекста длиной {len(context)} символов")
-        response = model(
             prompt=context,
             max_tokens=max_new_tokens,
             temperature=temperature,
             top_p=top_p,
             stop=["User:", "\n\n", "<|endoftext|>"],
-            echo=False  # Не возвращать промпт в ответе
-        )
-        generated_text = response['choices'][0]['text']
-        print(f"Ответ сгенерирован успешно, длина: {len(generated_text)}")
-        return generated_text.strip()
     except Exception as e:
         error_msg = f"Произошла ошибка: {str(e)}"
         print(error_msg)
-        return error_msg
 demo = gr.ChatInterface(
@@ -93,8 +96,8 @@ demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)"
         ),
     ],
-    title="GGUF Chat Model",
-    description="Чат с GGUF моделью (LLight-3.2-3B-Instruct)",
     examples=[
         ["Привет! Как дела?"],
         ["Расскажи мне о себе"],

         print(f"Генерируем ответ для контекста длиной {len(context)} символов")
+        # Используем генерацию с потоком
+        for response in model(
             prompt=context,
             max_tokens=max_new_tokens,
             temperature=temperature,
             top_p=top_p,
             stop=["User:", "\n\n", "<|endoftext|>"],
+            echo=False,  # Не возвращать промпт в ответе
+            stream=True  # Включаем потоковую передачу
+        ):
+            generated_text = response['choices'][0]['text']
+            print(f"Промежуточный ответ: {generated_text}")
+            yield generated_text  # Отправляем промежуточный результат
+        print("Ответ сгенерирован полностью.")
     except Exception as e:
         error_msg = f"Произошла ошибка: {str(e)}"
         print(error_msg)
+        yield error_msg
 demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)"
         ),
     ],
+    title="Llight Chat",
+    description="Чат с LLight-3.2-3B-Instruct",
     examples=[
         ["Привет! Как дела?"],
         ["Расскажи мне о себе"],