Spaces:

wellborgmann
/

apollo

Sleeping

wellborgmann commited on Dec 21, 2024

Commit

b29204b

verified ·

1 Parent(s): 61f28aa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("qwen2.5:0.5b")
@@ -30,18 +31,21 @@ def respond(
     try:
         # Chama a API de completamento com streaming
-        for message in client.chat_completion(
-            messages,
             max_tokens=max_tokens,
-            stream=True,
             temperature=temperature,
             top_p=top_p,
-        ):
             # Verifica se a resposta contém o conteúdo esperado
-            if 'choices' not in message or len(message.choices) == 0 or 'delta' not in message.choices[0]:
                 raise ValueError("Resposta inesperada do modelo.")
-            token = message.choices[0].delta.content
             response += token  # Acumula o conteúdo
             # Retorna a resposta incrementalmente
@@ -62,6 +66,7 @@ def respond(
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[

 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# Criando o cliente para interagir com o modelo no Hugging Face.
 client = InferenceClient("qwen2.5:0.5b")
     try:
         # Chama a API de completamento com streaming
+        # A API do Hugging Face usa o método `client.chat_completion`.
+        response_stream = client.chat_completion(
+            messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
+            stream=True
+        )
+        for message in response_stream:
             # Verifica se a resposta contém o conteúdo esperado
+            if 'choices' not in message or len(message['choices']) == 0 or 'delta' not in message['choices'][0]:
                 raise ValueError("Resposta inesperada do modelo.")
+            token = message['choices'][0]['delta']['content']
             response += token  # Acumula o conteúdo
             # Retorna a resposta incrementalmente
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
+# Criando a interface Gradio
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[