Spaces:

AugustLight
/

LLight-3.2-3b-Instruct

Sleeping

App Files Files Community

AugustLight commited on Oct 26, 2024

Commit

0062f54

verified ·

1 Parent(s): 501365f

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -17

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 model = None
 def load_model():
@@ -44,33 +45,28 @@ def respond(message, history, system_message, max_new_tokens, temperature, top_p
         print(f"Генерируем ответ для контекста длиной {len(context)} символов")
-        # Используем потоковый вывод
         response = model(
             prompt=context,
             max_tokens=max_new_tokens,
             temperature=temperature,
             top_p=top_p,
             stop=["User:", "\n\n", "<|endoftext|>"],
-            echo=False,
-            stream=True  # Включаем потоковое отображение
         )
-        # Генерация ответа с использованием yield
-        generated_text = ""
-        for token in response:
-            generated_text += token["text"]
-            yield generated_text.strip()
     except Exception as e:
         error_msg = f"Произошла ошибка: {str(e)}"
         print(error_msg)
-        yield error_msg
-demo = gr.Interface(
-    fn=respond,
-    inputs=[
-        gr.Textbox(lines=2, label="Сообщение пользователя"),
-        gr.State(),
         gr.Textbox(
             value="Ты дружелюбный и полезный ассистент. Отвечай обдуманно и по делу.",
             label="System message"
@@ -97,10 +93,14 @@ demo = gr.Interface(
             label="Top-p (nucleus sampling)"
         ),
     ],
-    outputs="text",
     title="GGUF Chat Model",
     description="Чат с GGUF моделью (LLight-3.2-3B-Instruct)",
-    live=True  # Включаем потоковую генерацию ответа
 )
 # Запускаем приложение
@@ -112,4 +112,4 @@ if __name__ == "__main__":
     except Exception as e:
         print(f"Ошибка при инициализации: {str(e)}")
-    demo.launch()

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# Так надо
 model = None
 def load_model():
         print(f"Генерируем ответ для контекста длиной {len(context)} символов")
         response = model(
             prompt=context,
             max_tokens=max_new_tokens,
             temperature=temperature,
             top_p=top_p,
             stop=["User:", "\n\n", "<|endoftext|>"],
+            echo=False  # Не возвращать промпт в ответе
         )
+        generated_text = response['choices'][0]['text']
+        print(f"Ответ сгенерирован успешно, длина: {len(generated_text)}")
+        return generated_text.strip()
     except Exception as e:
         error_msg = f"Произошла ошибка: {str(e)}"
         print(error_msg)
+        return error_msg
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
         gr.Textbox(
             value="Ты дружелюбный и полезный ассистент. Отвечай обдуманно и по делу.",
             label="System message"
             label="Top-p (nucleus sampling)"
         ),
     ],
     title="GGUF Chat Model",
     description="Чат с GGUF моделью (LLight-3.2-3B-Instruct)",
+    examples=[
+        ["Привет! Как дела?"],
+        ["Расскажи мне о себе"],
+        ["Что ты умеешь делать?"]
+    ],
+    cache_examples=False
 )
 # Запускаем приложение
     except Exception as e:
         print(f"Ошибка при инициализации: {str(e)}")
+    demo.launch()