vmagotr1 commited on
Commit
b95201b
·
verified ·
1 Parent(s): c758ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -55
app.py CHANGED
@@ -84,88 +84,44 @@
84
  import gradio as gr
85
  from huggingface_hub import InferenceClient
86
 
87
- # Step 1: Read your background info
88
- with open("BACKGROUND.md", "r", encoding="utf-8") as f:
89
- background_text = f.read()
90
-
91
- # Step 2: Set up your InferenceClient (using text-generation instead of chat)
92
  client = InferenceClient("google/gemma-2-2b-jpn-it")
93
 
94
- def respond(
95
- message,
96
- history: list[dict],
97
- system_message: str,
98
- max_tokens: int,
99
- temperature: float,
100
- top_p: float,
101
- ):
102
- """
103
- Merges 'system_message', 'background_text', and conversation 'history'
104
- into a single text prompt, then calls client.text_generation(...)
105
- for a response.
106
- """
107
  if history is None:
108
  history = []
109
 
110
- # Combine system instructions + background + prior conversation + new user message
111
- prompt = f"{system_message}\n\n### Background Information ###\n{background_text}\n\n"
112
- for interaction in history:
113
- if "user" in interaction:
114
- prompt += f"User: {interaction['user']}\n"
115
- if "assistant" in interaction:
116
- prompt += f"Assistant: {interaction['assistant']}\n"
117
- # Add the latest user query
118
- prompt += f"User: {message}\nAssistant:" # We'll generate the Assistant's text after this
119
-
120
- # Generate response using text_generation in streaming mode
121
- response = ""
122
- # The text returned will include the entire prompt + new text,
123
- # so we’ll need to subtract out the prompt length to isolate the new portion.
124
- prompt_length = len(prompt)
125
 
 
126
  for chunk in client.text_generation(
127
  prompt=prompt,
128
  max_new_tokens=max_tokens,
129
  temperature=temperature,
130
  top_p=top_p,
131
- stream=True, # streaming each chunk
132
  ):
133
- # Each chunk is a dict like {"generated_text": "full text so far..."}
134
- full_text = chunk["generated_text"]
135
- # The newly generated portion is what's after the original prompt
136
- new_text = full_text[prompt_length:]
137
- response += new_text
138
- prompt_length = len(full_text) # update for next chunk
139
  yield response
140
 
141
- # For debugging: show what we actually sent
142
- print("----- FULL PROMPT -----")
143
- print(prompt)
144
- print("----- END PROMPT -----")
145
 
146
 
147
- # Step 3: Build a Gradio Blocks interface with two Tabs
148
  with gr.Blocks() as demo:
149
  with gr.Tab("Gemma Chat Agent"):
150
- gr.Markdown("## Welcome to Varun's GPT Agent")
151
- gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
152
  chat = gr.ChatInterface(
153
  fn=respond,
154
  additional_inputs=[
155
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
156
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
157
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
158
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
159
  ],
160
- type="messages", # Gradio will keep track of (user, assistant) messages in history
161
  )
162
 
163
- # Optional: If you want a separate tab to display background_text
164
- # with gr.Tab("Varun's Background"):
165
- # gr.Markdown("# About Varun")
166
- # gr.Markdown(background_text)
167
-
168
- # Step 4: Launch
169
  if __name__ == "__main__":
170
  demo.launch()
171
 
 
84
  import gradio as gr
85
  from huggingface_hub import InferenceClient
86
 
 
 
 
 
 
87
  client = InferenceClient("google/gemma-2-2b-jpn-it")
88
 
89
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
 
 
 
 
90
  if history is None:
91
  history = []
92
 
93
+ prompt = f"{system_message}\n\n# Background...\n\n" # etc.
94
+ # Build up your prompt from history...
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ response = ""
97
  for chunk in client.text_generation(
98
  prompt=prompt,
99
  max_new_tokens=max_tokens,
100
  temperature=temperature,
101
  top_p=top_p,
102
+ stream=True,
103
  ):
104
+ # 'chunk' is a string of newly generated text.
105
+ response += chunk
 
 
 
 
106
  yield response
107
 
108
+ # (Optional) log the final prompt
109
+ print("PROMPT:", prompt)
 
 
110
 
111
 
 
112
  with gr.Blocks() as demo:
113
  with gr.Tab("Gemma Chat Agent"):
 
 
114
  chat = gr.ChatInterface(
115
  fn=respond,
116
  additional_inputs=[
117
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
118
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
119
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
120
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
121
  ],
122
+ type="messages",
123
  )
124
 
 
 
 
 
 
 
125
  if __name__ == "__main__":
126
  demo.launch()
127