vmagotr1 commited on
Commit
c758ec0
·
verified ·
1 Parent(s): a3716db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -33
app.py CHANGED
@@ -1,3 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
@@ -5,9 +88,9 @@ from huggingface_hub import InferenceClient
5
  with open("BACKGROUND.md", "r", encoding="utf-8") as f:
6
  background_text = f.read()
7
 
8
- # Step 2: Set up your InferenceClient (same as before)
9
  client = InferenceClient("google/gemma-2-2b-jpn-it")
10
- # HuggingFaceH4/zephyr-7b-beta
11
  def respond(
12
  message,
13
  history: list[dict],
@@ -16,49 +99,54 @@ def respond(
16
  temperature: float,
17
  top_p: float,
18
  ):
 
 
 
 
 
19
  if history is None:
20
  history = []
21
 
22
- # Include background text as part of the system message for context
23
- combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"
24
-
25
- # Start building the conversation history
26
- messages = [{"role": "system", "content": combined_system_message}]
27
-
28
- # Add conversation history
29
  for interaction in history:
30
  if "user" in interaction:
31
- messages.append({"role": "user", "content": interaction["user"]})
32
  if "assistant" in interaction:
33
- messages.append({"role": "assistant", "content": interaction["assistant"]})
 
 
34
 
35
- # Add the latest user message
36
- messages.append({"role": "user", "content": message})
37
-
38
- # Generate response
39
  response = ""
40
- for msg in client.chat_completion(
41
- messages,
42
- max_tokens=max_tokens,
43
- stream=True,
 
 
 
44
  temperature=temperature,
45
  top_p=top_p,
 
46
  ):
47
-
48
- token = msg.choices[0].delta.content
49
- response += token
 
 
 
50
  yield response
51
- print("----- SYSTEM MESSAGE -----")
52
- print(messages[0]["content"])
53
- print("----- FULL MESSAGES LIST -----")
54
- for m in messages:
55
- print(m)
56
- print("-------------------------")
57
-
58
  # Step 3: Build a Gradio Blocks interface with two Tabs
59
  with gr.Blocks() as demo:
60
- # Tab 1: GPT Chat Agent
61
- with gr.Tab("GPT Chat Agent"):
62
  gr.Markdown("## Welcome to Varun's GPT Agent")
63
  gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
64
  chat = gr.ChatInterface(
@@ -69,10 +157,10 @@ with gr.Blocks() as demo:
69
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
70
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
71
  ],
72
- type="messages", # Specify message type
73
  )
74
 
75
- # # Tab 2: Background Document
76
  # with gr.Tab("Varun's Background"):
77
  # gr.Markdown("# About Varun")
78
  # gr.Markdown(background_text)
@@ -80,3 +168,4 @@ with gr.Blocks() as demo:
80
  # Step 4: Launch
81
  if __name__ == "__main__":
82
  demo.launch()
 
 
1
+ # import gradio as gr
2
+ # from huggingface_hub import InferenceClient
3
+
4
+ # # Step 1: Read your background info
5
+ # with open("BACKGROUND.md", "r", encoding="utf-8") as f:
6
+ # background_text = f.read()
7
+
8
+ # # Step 2: Set up your InferenceClient (same as before)
9
+ # client = InferenceClient("google/gemma-2-2b-jpn-it")
10
+ # # HuggingFaceH4/zephyr-7b-beta
11
+ # def respond(
12
+ # message,
13
+ # history: list[dict],
14
+ # system_message: str,
15
+ # max_tokens: int,
16
+ # temperature: float,
17
+ # top_p: float,
18
+ # ):
19
+ # if history is None:
20
+ # history = []
21
+
22
+ # # Include background text as part of the system message for context
23
+ # combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"
24
+
25
+ # # Start building the conversation history
26
+ # messages = [{"role": "system", "content": combined_system_message}]
27
+
28
+ # # Add conversation history
29
+ # for interaction in history:
30
+ # if "user" in interaction:
31
+ # messages.append({"role": "user", "content": interaction["user"]})
32
+ # if "assistant" in interaction:
33
+ # messages.append({"role": "assistant", "content": interaction["assistant"]})
34
+
35
+ # # Add the latest user message
36
+ # messages.append({"role": "user", "content": message})
37
+
38
+ # # Generate response
39
+ # response = ""
40
+ # for msg in client.chat_completion(
41
+ # messages,
42
+ # max_tokens=max_tokens,
43
+ # stream=True,
44
+ # temperature=temperature,
45
+ # top_p=top_p,
46
+ # ):
47
+
48
+ # token = msg.choices[0].delta.content
49
+ # response += token
50
+ # yield response
51
+ # print("----- SYSTEM MESSAGE -----")
52
+ # print(messages[0]["content"])
53
+ # print("----- FULL MESSAGES LIST -----")
54
+ # for m in messages:
55
+ # print(m)
56
+ # print("-------------------------")
57
+
58
+ # # Step 3: Build a Gradio Blocks interface with two Tabs
59
+ # with gr.Blocks() as demo:
60
+ # # Tab 1: GPT Chat Agent
61
+ # with gr.Tab("GPT Chat Agent"):
62
+ # gr.Markdown("## Welcome to Varun's GPT Agent")
63
+ # gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
64
+ # chat = gr.ChatInterface(
65
+ # fn=respond,
66
+ # additional_inputs=[
67
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
68
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
69
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
70
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
71
+ # ],
72
+ # type="messages", # Specify message type
73
+ # )
74
+
75
+ # # # Tab 2: Background Document
76
+ # # with gr.Tab("Varun's Background"):
77
+ # # gr.Markdown("# About Varun")
78
+ # # gr.Markdown(background_text)
79
+
80
+ # # Step 4: Launch
81
+ # if __name__ == "__main__":
82
+ # demo.launch()
83
+
84
  import gradio as gr
85
  from huggingface_hub import InferenceClient
86
 
 
88
  with open("BACKGROUND.md", "r", encoding="utf-8") as f:
89
  background_text = f.read()
90
 
91
+ # Step 2: Set up your InferenceClient (using text-generation instead of chat)
92
  client = InferenceClient("google/gemma-2-2b-jpn-it")
93
+
94
  def respond(
95
  message,
96
  history: list[dict],
 
99
  temperature: float,
100
  top_p: float,
101
  ):
102
+ """
103
+ Merges 'system_message', 'background_text', and conversation 'history'
104
+ into a single text prompt, then calls client.text_generation(...)
105
+ for a response.
106
+ """
107
  if history is None:
108
  history = []
109
 
110
+ # Combine system instructions + background + prior conversation + new user message
111
+ prompt = f"{system_message}\n\n### Background Information ###\n{background_text}\n\n"
 
 
 
 
 
112
  for interaction in history:
113
  if "user" in interaction:
114
+ prompt += f"User: {interaction['user']}\n"
115
  if "assistant" in interaction:
116
+ prompt += f"Assistant: {interaction['assistant']}\n"
117
+ # Add the latest user query
118
+ prompt += f"User: {message}\nAssistant:" # We'll generate the Assistant's text after this
119
 
120
+ # Generate response using text_generation in streaming mode
 
 
 
121
  response = ""
122
+ # The text returned will include the entire prompt + new text,
123
+ # so we’ll need to subtract out the prompt length to isolate the new portion.
124
+ prompt_length = len(prompt)
125
+
126
+ for chunk in client.text_generation(
127
+ prompt=prompt,
128
+ max_new_tokens=max_tokens,
129
  temperature=temperature,
130
  top_p=top_p,
131
+ stream=True, # streaming each chunk
132
  ):
133
+ # Each chunk is a dict like {"generated_text": "full text so far..."}
134
+ full_text = chunk["generated_text"]
135
+ # The newly generated portion is what's after the original prompt
136
+ new_text = full_text[prompt_length:]
137
+ response += new_text
138
+ prompt_length = len(full_text) # update for next chunk
139
  yield response
140
+
141
+ # For debugging: show what we actually sent
142
+ print("----- FULL PROMPT -----")
143
+ print(prompt)
144
+ print("----- END PROMPT -----")
145
+
146
+
147
  # Step 3: Build a Gradio Blocks interface with two Tabs
148
  with gr.Blocks() as demo:
149
+ with gr.Tab("Gemma Chat Agent"):
 
150
  gr.Markdown("## Welcome to Varun's GPT Agent")
151
  gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
152
  chat = gr.ChatInterface(
 
157
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
158
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
159
  ],
160
+ type="messages", # Gradio will keep track of (user, assistant) messages in history
161
  )
162
 
163
+ # Optional: If you want a separate tab to display background_text
164
  # with gr.Tab("Varun's Background"):
165
  # gr.Markdown("# About Varun")
166
  # gr.Markdown(background_text)
 
168
  # Step 4: Launch
169
  if __name__ == "__main__":
170
  demo.launch()
171
+