Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -84,88 +84,44 @@
|
|
84 |
import gradio as gr
|
85 |
from huggingface_hub import InferenceClient
|
86 |
|
87 |
-
# Step 1: Read your background info
|
88 |
-
with open("BACKGROUND.md", "r", encoding="utf-8") as f:
|
89 |
-
background_text = f.read()
|
90 |
-
|
91 |
-
# Step 2: Set up your InferenceClient (using text-generation instead of chat)
|
92 |
client = InferenceClient("google/gemma-2-2b-jpn-it")
|
93 |
|
94 |
-
def respond(
|
95 |
-
message,
|
96 |
-
history: list[dict],
|
97 |
-
system_message: str,
|
98 |
-
max_tokens: int,
|
99 |
-
temperature: float,
|
100 |
-
top_p: float,
|
101 |
-
):
|
102 |
-
"""
|
103 |
-
Merges 'system_message', 'background_text', and conversation 'history'
|
104 |
-
into a single text prompt, then calls client.text_generation(...)
|
105 |
-
for a response.
|
106 |
-
"""
|
107 |
if history is None:
|
108 |
history = []
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
for interaction in history:
|
113 |
-
if "user" in interaction:
|
114 |
-
prompt += f"User: {interaction['user']}\n"
|
115 |
-
if "assistant" in interaction:
|
116 |
-
prompt += f"Assistant: {interaction['assistant']}\n"
|
117 |
-
# Add the latest user query
|
118 |
-
prompt += f"User: {message}\nAssistant:" # We'll generate the Assistant's text after this
|
119 |
-
|
120 |
-
# Generate response using text_generation in streaming mode
|
121 |
-
response = ""
|
122 |
-
# The text returned will include the entire prompt + new text,
|
123 |
-
# so we’ll need to subtract out the prompt length to isolate the new portion.
|
124 |
-
prompt_length = len(prompt)
|
125 |
|
|
|
126 |
for chunk in client.text_generation(
|
127 |
prompt=prompt,
|
128 |
max_new_tokens=max_tokens,
|
129 |
temperature=temperature,
|
130 |
top_p=top_p,
|
131 |
-
stream=True,
|
132 |
):
|
133 |
-
#
|
134 |
-
|
135 |
-
# The newly generated portion is what's after the original prompt
|
136 |
-
new_text = full_text[prompt_length:]
|
137 |
-
response += new_text
|
138 |
-
prompt_length = len(full_text) # update for next chunk
|
139 |
yield response
|
140 |
|
141 |
-
#
|
142 |
-
print("
|
143 |
-
print(prompt)
|
144 |
-
print("----- END PROMPT -----")
|
145 |
|
146 |
|
147 |
-
# Step 3: Build a Gradio Blocks interface with two Tabs
|
148 |
with gr.Blocks() as demo:
|
149 |
with gr.Tab("Gemma Chat Agent"):
|
150 |
-
gr.Markdown("## Welcome to Varun's GPT Agent")
|
151 |
-
gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
|
152 |
chat = gr.ChatInterface(
|
153 |
fn=respond,
|
154 |
additional_inputs=[
|
155 |
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
156 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
157 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
158 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p
|
159 |
],
|
160 |
-
type="messages",
|
161 |
)
|
162 |
|
163 |
-
# Optional: If you want a separate tab to display background_text
|
164 |
-
# with gr.Tab("Varun's Background"):
|
165 |
-
# gr.Markdown("# About Varun")
|
166 |
-
# gr.Markdown(background_text)
|
167 |
-
|
168 |
-
# Step 4: Launch
|
169 |
if __name__ == "__main__":
|
170 |
demo.launch()
|
171 |
|
|
|
84 |
import gradio as gr
|
85 |
from huggingface_hub import InferenceClient
|
86 |
|
|
|
|
|
|
|
|
|
|
|
87 |
client = InferenceClient("google/gemma-2-2b-jpn-it")
|
88 |
|
89 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
if history is None:
|
91 |
history = []
|
92 |
|
93 |
+
prompt = f"{system_message}\n\n# Background...\n\n" # etc.
|
94 |
+
# Build up your prompt from history...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
response = ""
|
97 |
for chunk in client.text_generation(
|
98 |
prompt=prompt,
|
99 |
max_new_tokens=max_tokens,
|
100 |
temperature=temperature,
|
101 |
top_p=top_p,
|
102 |
+
stream=True,
|
103 |
):
|
104 |
+
# 'chunk' is a string of newly generated text.
|
105 |
+
response += chunk
|
|
|
|
|
|
|
|
|
106 |
yield response
|
107 |
|
108 |
+
# (Optional) log the final prompt
|
109 |
+
print("PROMPT:", prompt)
|
|
|
|
|
110 |
|
111 |
|
|
|
112 |
with gr.Blocks() as demo:
|
113 |
with gr.Tab("Gemma Chat Agent"):
|
|
|
|
|
114 |
chat = gr.ChatInterface(
|
115 |
fn=respond,
|
116 |
additional_inputs=[
|
117 |
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
118 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
119 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
120 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
|
121 |
],
|
122 |
+
type="messages",
|
123 |
)
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
if __name__ == "__main__":
|
126 |
demo.launch()
|
127 |
|