SaisExperiments commited on
Commit
43de95d
·
verified ·
1 Parent(s): a8e97ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -99
app.py CHANGED
@@ -1,146 +1,123 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  import os
4
 
5
- # --- Installation Note ---
6
- # Ensure you have the necessary libraries installed:
7
- # pip install gradio huggingface_hub
8
 
9
- # --- Hugging Face Hub Token ---
10
- # The InferenceClient might require a Hugging Face Hub token for certain models or usage.
11
- # Set it as an environment variable HUGGING_FACE_HUB_TOKEN, or log in via `huggingface-cli login`.
12
- # If the model is public and doesn't require login, this might work without a token.
13
- # HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN") # Optional: explicitly get token if needed
14
- client = None
15
  try:
16
- client = InferenceClient(
17
- "HuggingFaceH4/zephyr-7b-beta",
18
- # token=HUGGING_FACE_HUB_TOKEN # Uncomment if you want to pass token explicitly
19
- )
20
- print("InferenceClient initialized successfully.")
21
  except Exception as e:
22
  print(f"Error initializing InferenceClient: {e}")
23
- print("Please ensure the model identifier is correct and you have necessary permissions/token.")
24
- # You might want to exit or raise the error depending on your application structure
25
- # For this Gradio app, we'll let the respond function handle the missing client.
26
-
 
 
 
 
27
 
28
  def respond(
29
  message: str,
30
- history: list[tuple[str, str]],
31
- system_message: str = "You are a friendly Chatbot.", # Default value matching UI
32
- max_tokens: int = 512, # Default value matching UI
33
- temperature: float = 0.7, # Default value matching UI
34
- top_p: float = 0.95, # Default value matching UI
35
  ):
36
  """
37
- Chat response function for the Gradio interface.
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
- # --- Client Check ---
40
- if client is None:
41
- yield "Error: InferenceClient could not be initialized. Please check server logs."
42
- return # Stop generation if client is not available
43
-
44
- # --- Input Validation (Basic) ---
45
- if not message:
46
- yield "Error: Please enter a message."
47
- return
48
- if not system_message:
49
- system_message = "You are a helpful assistant." # Fallback system message
50
-
51
  messages = [{"role": "system", "content": system_message}]
52
 
53
- for user_msg, assistant_msg in history:
 
54
  if user_msg:
55
  messages.append({"role": "user", "content": user_msg})
56
- if assistant_msg:
57
- messages.append({"role": "assistant", "content": assistant_msg})
58
 
 
59
  messages.append({"role": "user", "content": message})
60
 
61
- response_text = ""
62
-
63
  try:
64
- # Stream the response
65
- for message_chunk in client.chat_completion(
66
  messages=messages,
67
  max_tokens=max_tokens,
68
  stream=True,
69
  temperature=temperature,
70
  top_p=top_p,
71
  ):
72
- # Check if delta and content exist and are not None
73
- token = message_chunk.choices[0].delta.content
74
-
75
- # --- Robust Token Handling ---
76
- if token is not None:
77
- response_text += token
78
- yield response_text # Yield the accumulated response incrementally
79
-
 
80
  except Exception as e:
81
- print(f"Error during API call: {e}")
82
- # Yield a user-friendly error message
83
- yield f"An error occurred while generating the response: {e}"
84
 
85
 
86
- # --- Gradio Interface Definition ---
 
 
87
  demo = gr.ChatInterface(
88
  respond,
89
- chatbot=gr.Chatbot(
90
- height=500,
91
- label="Zephyr 7B Beta",
92
- show_label=True,
93
- bubble_full_width=False, # Optional: Adjust bubble width
94
- ),
95
- title="🤖 Zephyr 7B Beta Chat",
96
- description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API. \nEnter your message and adjust settings below.",
97
  examples=[
98
- ["Hello, how are you today?"],
99
- ["What is the capital of France?"],
100
- ["Explain the concept of large language models in simple terms."],
101
- ["Write a short poem about the rain."]
102
  ],
103
- cache_examples=False, # Set to True to cache example results if desired
 
 
 
104
  additional_inputs=[
105
- gr.Textbox(
106
- value="You are a friendly and helpful chatbot.", # Default system message
107
- label="System Message",
108
- info="The instruction given to the chatbot to guide its behavior.",
109
- ),
110
- gr.Slider(
111
- minimum=1,
112
- maximum=2048,
113
- value=512, # Default max tokens
114
- step=1,
115
- label="Max New Tokens",
116
- info="Maximum number of tokens to generate."
117
- ),
118
- gr.Slider(
119
- minimum=0.1,
120
- # Max temperature adjusted: values > 1.0 often degrade quality
121
- maximum=1.0,
122
- value=0.7, # Default temperature
123
- step=0.1,
124
- label="Temperature",
125
- info="Controls randomness. Lower values make output more focused, higher values make it more diverse."
126
- ),
127
  gr.Slider(
128
  minimum=0.1,
129
  maximum=1.0,
130
- value=0.95, # Default top-p
131
  step=0.05,
132
  label="Top-p (nucleus sampling)",
133
- info="Considers only the most probable tokens with cumulative probability p. Helps prevent low-probability tokens."
134
  ),
135
  ],
136
- additional_inputs_accordion_name="⚙️ Advanced Settings" # Group settings
137
  )
138
 
139
 
140
  if __name__ == "__main__":
141
- # Launch the Gradio app
142
- demo.launch(
143
- # share=True # Uncomment to create a temporary public link (use with caution)
144
- # server_name="0.0.0.0" # Uncomment to allow access from your local network
145
- # auth=("user", "password") # Optional: Add basic authentication
146
- )
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from huggingface_hub.inference_api import InferenceApiException
4
  import os
5
 
6
+ """
7
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
8
 
9
+ **Note:** You might need to authenticate with Hugging Face for this to work reliably.
10
+ Run `huggingface-cli login` in your terminal or set the HUGGING_FACE_HUB_TOKEN environment variable.
11
+ Alternatively, pass your token directly: InferenceClient(token="hf_YOUR_TOKEN")
12
+ """
13
+ # Initialize the Inference Client
14
+ # It will try to use HUGGING_FACE_HUB_TOKEN environment variable or cached login
15
  try:
16
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
17
  except Exception as e:
18
  print(f"Error initializing InferenceClient: {e}")
19
+ # Optionally, provide a default token if needed and available
20
+ # token = os.getenv("HUGGING_FACE_HUB_TOKEN")
21
+ # if token:
22
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=token)
23
+ # else:
24
+ # raise ValueError("Could not initialize InferenceClient. Ensure you are logged in or provide a token.") from e
25
+ # For now, let's just raise it if initialization fails fundamentally
26
+ raise
27
 
28
  def respond(
29
  message: str,
30
+ history: list[tuple[str | None, str | None]],
31
+ system_message: str,
32
+ max_tokens: int,
33
+ temperature: float,
34
+ top_p: float,
35
  ):
36
  """
37
+ Generates a response using the Hugging Face Inference API.
38
+
39
+ Args:
40
+ message: The user's input message.
41
+ history: A list of tuples representing the conversation history.
42
+ Each tuple is (user_message, bot_message).
43
+ system_message: The system prompt to guide the model.
44
+ max_tokens: The maximum number of new tokens to generate.
45
+ temperature: Controls randomness (higher = more random).
46
+ top_p: Nucleus sampling parameter.
47
+
48
+ Yields:
49
+ The generated response incrementally.
50
  """
 
 
 
 
 
 
 
 
 
 
 
 
51
  messages = [{"role": "system", "content": system_message}]
52
 
53
+ # Add conversation history
54
+ for user_msg, bot_msg in history:
55
  if user_msg:
56
  messages.append({"role": "user", "content": user_msg})
57
+ if bot_msg:
58
+ messages.append({"role": "assistant", "content": bot_msg})
59
 
60
+ # Add the latest user message
61
  messages.append({"role": "user", "content": message})
62
 
63
+ response = ""
 
64
  try:
65
+ # Start streaming the response
66
+ for msg_chunk in client.chat_completion(
67
  messages=messages,
68
  max_tokens=max_tokens,
69
  stream=True,
70
  temperature=temperature,
71
  top_p=top_p,
72
  ):
73
+ # Check if there's content in the delta
74
+ token = msg_chunk.choices[0].delta.content
75
+ if token: # Add check for empty/None token
76
+ response += token
77
+ yield response # Yield the accumulated response so far
78
+
79
+ except InferenceApiException as e:
80
+ print(f"Inference API Error: {e}")
81
+ yield f"Sorry, I encountered an error: {e}"
82
  except Exception as e:
83
+ print(f"An unexpected error occurred: {e}")
84
+ yield f"Sorry, an unexpected error occurred: {e}"
 
85
 
86
 
87
+ """
88
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
89
+ """
90
  demo = gr.ChatInterface(
91
  respond,
92
+ chatbot=gr.Chatbot(height=400), # Adjust chatbot height if desired
93
+ textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
94
+ title="Zephyr 7B Beta Chat",
95
+ description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API.",
96
+ theme="soft", # Optional: Apply a theme
 
 
 
97
  examples=[
98
+ ["Hello!"],
99
+ ["Explain the concept of Large Language Models in simple terms."],
100
+ ["Write a short poem about the moon."],
 
101
  ],
102
+ cache_examples=False, # Set to True to cache example results
103
+ retry_btn="Retry",
104
+ undo_btn="Undo",
105
+ clear_btn="Clear",
106
  additional_inputs=[
107
+ gr.Textbox(value="You are a friendly and helpful chatbot.", label="System message"),
108
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
109
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"), # Note: Max temp often capped lower (e.g., 1.0 or 2.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  gr.Slider(
111
  minimum=0.1,
112
  maximum=1.0,
113
+ value=0.95,
114
  step=0.05,
115
  label="Top-p (nucleus sampling)",
 
116
  ),
117
  ],
118
+ additional_inputs_accordion=gr.Accordion(label="Advanced Options", open=False), # Group additional inputs
119
  )
120
 
121
 
122
  if __name__ == "__main__":
123
+ demo.launch()