Spaces:

SaisExperiments
/

Sad-Llama-3.2-3B

Running

App Files Files Community

SaisExperiments commited on Apr 2

Commit

43de95d

verified ·

1 Parent(s): a8e97ac

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -99

app.py CHANGED Viewed

@@ -1,146 +1,123 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 import os
-# --- Installation Note ---
-# Ensure you have the necessary libraries installed:
-# pip install gradio huggingface_hub
-# --- Hugging Face Hub Token ---
-# The InferenceClient might require a Hugging Face Hub token for certain models or usage.
-# Set it as an environment variable HUGGING_FACE_HUB_TOKEN, or log in via `huggingface-cli login`.
-# If the model is public and doesn't require login, this might work without a token.
-# HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN") # Optional: explicitly get token if needed
-client = None
 try:
-    client = InferenceClient(
-        "HuggingFaceH4/zephyr-7b-beta",
-        # token=HUGGING_FACE_HUB_TOKEN # Uncomment if you want to pass token explicitly
-    )
-    print("InferenceClient initialized successfully.")
 except Exception as e:
     print(f"Error initializing InferenceClient: {e}")
-    print("Please ensure the model identifier is correct and you have necessary permissions/token.")
-    # You might want to exit or raise the error depending on your application structure
-    # For this Gradio app, we'll let the respond function handle the missing client.
 def respond(
     message: str,
-    history: list[tuple[str, str]],
-    system_message: str = "You are a friendly Chatbot.", # Default value matching UI
-    max_tokens: int = 512, # Default value matching UI
-    temperature: float = 0.7, # Default value matching UI
-    top_p: float = 0.95, # Default value matching UI
 ):
     """
-    Chat response function for the Gradio interface.
     """
-    # --- Client Check ---
-    if client is None:
-        yield "Error: InferenceClient could not be initialized. Please check server logs."
-        return # Stop generation if client is not available
-    # --- Input Validation (Basic) ---
-    if not message:
-        yield "Error: Please enter a message."
-        return
-    if not system_message:
-        system_message = "You are a helpful assistant." # Fallback system message
     messages = [{"role": "system", "content": system_message}]
-    for user_msg, assistant_msg in history:
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
-    response_text = ""
     try:
-        # Stream the response
-        for message_chunk in client.chat_completion(
             messages=messages,
             max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
         ):
-            # Check if delta and content exist and are not None
-            token = message_chunk.choices[0].delta.content
-            # --- Robust Token Handling ---
-            if token is not None:
-                response_text += token
-                yield response_text # Yield the accumulated response incrementally
     except Exception as e:
-        print(f"Error during API call: {e}")
-        # Yield a user-friendly error message
-        yield f"An error occurred while generating the response: {e}"
-# --- Gradio Interface Definition ---
 demo = gr.ChatInterface(
     respond,
-    chatbot=gr.Chatbot(
-        height=500,
-        label="Zephyr 7B Beta",
-        show_label=True,
-        bubble_full_width=False, # Optional: Adjust bubble width
-    ),
-    title="🤖 Zephyr 7B Beta Chat",
-    description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API. \nEnter your message and adjust settings below.",
     examples=[
-        ["Hello, how are you today?"],
-        ["What is the capital of France?"],
-        ["Explain the concept of large language models in simple terms."],
-        ["Write a short poem about the rain."]
     ],
-    cache_examples=False, # Set to True to cache example results if desired
     additional_inputs=[
-        gr.Textbox(
-            value="You are a friendly and helpful chatbot.", # Default system message
-            label="System Message",
-            info="The instruction given to the chatbot to guide its behavior.",
-        ),
-        gr.Slider(
-            minimum=1,
-            maximum=2048,
-            value=512, # Default max tokens
-            step=1,
-            label="Max New Tokens",
-            info="Maximum number of tokens to generate."
-        ),
-        gr.Slider(
-            minimum=0.1,
-            # Max temperature adjusted: values > 1.0 often degrade quality
-            maximum=1.0,
-            value=0.7, # Default temperature
-            step=0.1,
-            label="Temperature",
-            info="Controls randomness. Lower values make output more focused, higher values make it more diverse."
-        ),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
-            value=0.95, # Default top-p
             step=0.05,
             label="Top-p (nucleus sampling)",
-            info="Considers only the most probable tokens with cumulative probability p. Helps prevent low-probability tokens."
         ),
     ],
-     additional_inputs_accordion_name="⚙️ Advanced Settings" # Group settings
 )
 if __name__ == "__main__":
-    # Launch the Gradio app
-    demo.launch(
-        # share=True # Uncomment to create a temporary public link (use with caution)
-        # server_name="0.0.0.0" # Uncomment to allow access from your local network
-        # auth=("user", "password") # Optional: Add basic authentication
-    )

 import gradio as gr
 from huggingface_hub import InferenceClient
+from huggingface_hub.inference_api import InferenceApiException
 import os
+"""
+For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+**Note:** You might need to authenticate with Hugging Face for this to work reliably.
+Run `huggingface-cli login` in your terminal or set the HUGGING_FACE_HUB_TOKEN environment variable.
+Alternatively, pass your token directly: InferenceClient(token="hf_YOUR_TOKEN")
+"""
+# Initialize the Inference Client
+# It will try to use HUGGING_FACE_HUB_TOKEN environment variable or cached login
 try:
+    client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 except Exception as e:
     print(f"Error initializing InferenceClient: {e}")
+    # Optionally, provide a default token if needed and available
+    # token = os.getenv("HUGGING_FACE_HUB_TOKEN")
+    # if token:
+    #     client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=token)
+    # else:
+    #     raise ValueError("Could not initialize InferenceClient. Ensure you are logged in or provide a token.") from e
+    # For now, let's just raise it if initialization fails fundamentally
+    raise
 def respond(
     message: str,
+    history: list[tuple[str | None, str | None]],
+    system_message: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
 ):
     """
+    Generates a response using the Hugging Face Inference API.
+    Args:
+        message: The user's input message.
+        history: A list of tuples representing the conversation history.
+                 Each tuple is (user_message, bot_message).
+        system_message: The system prompt to guide the model.
+        max_tokens: The maximum number of new tokens to generate.
+        temperature: Controls randomness (higher = more random).
+        top_p: Nucleus sampling parameter.
+    Yields:
+        The generated response incrementally.
     """
     messages = [{"role": "system", "content": system_message}]
+    # Add conversation history
+    for user_msg, bot_msg in history:
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    # Add the latest user message
     messages.append({"role": "user", "content": message})
+    response = ""
     try:
+        # Start streaming the response
+        for msg_chunk in client.chat_completion(
             messages=messages,
             max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
         ):
+            # Check if there's content in the delta
+            token = msg_chunk.choices[0].delta.content
+            if token:  # Add check for empty/None token
+                response += token
+                yield response # Yield the accumulated response so far
+    except InferenceApiException as e:
+        print(f"Inference API Error: {e}")
+        yield f"Sorry, I encountered an error: {e}"
     except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        yield f"Sorry, an unexpected error occurred: {e}"
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
 demo = gr.ChatInterface(
     respond,
+    chatbot=gr.Chatbot(height=400), # Adjust chatbot height if desired
+    textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
+    title="Zephyr 7B Beta Chat",
+    description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API.",
+    theme="soft", # Optional: Apply a theme
     examples=[
+        ["Hello!"],
+        ["Explain the concept of Large Language Models in simple terms."],
+        ["Write a short poem about the moon."],
     ],
+    cache_examples=False, # Set to True to cache example results
+    retry_btn="Retry",
+    undo_btn="Undo",
+    clear_btn="Clear",
     additional_inputs=[
+        gr.Textbox(value="You are a friendly and helpful chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"), # Note: Max temp often capped lower (e.g., 1.0 or 2.0)
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
+            value=0.95,
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
     ],
+     additional_inputs_accordion=gr.Accordion(label="Advanced Options", open=False), # Group additional inputs
 )
 if __name__ == "__main__":
+    demo.launch()