Bhaskar2611 commited on
Commit
b2ec231
·
verified ·
1 Parent(s): 7f0753e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -452
app.py CHANGED
@@ -1,443 +1,3 @@
1
- # import gradio as gr
2
- # from huggingface_hub import InferenceClient
3
-
4
- # """
5
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- # """
7
- # client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
8
-
9
-
10
- # def respond(
11
- # message,
12
- # history: list[tuple[str, str]],
13
- # system_message,
14
- # max_tokens,
15
- # temperature,
16
- # top_p,
17
- # ):
18
- # messages = [{"role": "system", "content": system_message}]
19
-
20
- # for val in history:
21
- # if val[0]:
22
- # messages.append({"role": "user", "content": val[0]})
23
- # if val[1]:
24
- # messages.append({"role": "assistant", "content": val[1]})
25
-
26
- # messages.append({"role": "user", "content": message})
27
-
28
- # response = ""
29
-
30
- # for message in client.chat_completion(
31
- # messages,
32
- # max_tokens=max_tokens,
33
- # stream=True,
34
- # temperature=temperature,
35
- # top_p=top_p,
36
- # ):
37
- # token = message.choices[0].delta.content
38
-
39
- # response += token
40
- # yield response
41
-
42
-
43
- # """
44
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- # """
46
- # demo = gr.ChatInterface(
47
- # respond,
48
- # additional_inputs=[
49
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- # gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
51
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- # gr.Slider(
53
- # minimum=0.1,
54
- # maximum=1.0,
55
- # value=0.95,
56
- # step=0.05,
57
- # label="Top-p (nucleus sampling)",
58
- # ),
59
- # ],
60
- # )
61
-
62
-
63
- # if __name__ == "__main__":
64
- # demo.launch()
65
-
66
- # import gradio as gr
67
- # from huggingface_hub import InferenceClient
68
-
69
- """
70
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
71
- """
72
- # client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
73
-
74
- # def respond(message, history: list[tuple[str, str]]):
75
- # system_message = (
76
- # "You are a helpful and experienced coding assistant specialized in web development. "
77
- # "Help the user by generating complete and functional code for building websites. "
78
- # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) based on their requirements. "
79
- # "Break down the tasks clearly if needed, and be friendly and supportive in your responses.")
80
- # max_tokens = 2048
81
- # temperature = 0.7
82
- # top_p = 0.95
83
-
84
- # messages = [{"role": "system", "content": system_message}]
85
-
86
- # for val in history:
87
- # if val[0]:
88
- # messages.append({"role": "user", "content": val[0]})
89
- # if val[1]:
90
- # messages.append({"role": "assistant", "content": val[1]})
91
-
92
- # messages.append({"role": "user", "content": message})
93
-
94
- # response = ""
95
-
96
- # for message in client.chat_completion(
97
- # messages,
98
- # max_tokens=max_tokens,
99
- # stream=True,
100
- # temperature=temperature,
101
- # top_p=top_p,
102
- # ):
103
- # token = message.choices[0].delta.content
104
-
105
- # response += token
106
- # yield response
107
-
108
- # """
109
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
110
- # """
111
- # demo = gr.ChatInterface(respond)
112
-
113
- # if __name__ == "__main__":
114
- # demo.launch()
115
-
116
- # import gradio as gr
117
- # from huggingface_hub import InferenceClient
118
-
119
- # """
120
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
121
- # """
122
- # client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
123
-
124
- # def respond(message, history: list[tuple[str, str]]):
125
- # system_message = (
126
- # "You are a helpful and experienced coding assistant specialized in web development. "
127
- # "Help the user by generating complete and functional code for building websites. "
128
- # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) based on their requirements. "
129
- # "Break down the tasks clearly if needed, and be friendly and supportive in your responses."
130
- # )
131
- # max_tokens = 2048
132
- # temperature = 0.7
133
- # top_p = 0.95
134
-
135
- # messages = [{"role": "system", "content": system_message}]
136
-
137
- # for val in history:
138
- # if val[0]:
139
- # messages.append({"role": "user", "content": val[0]})
140
- # if val[1]:
141
- # messages.append({"role": "assistant", "content": val[1]})
142
-
143
- # messages.append({"role": "user", "content": message})
144
-
145
- # response = ""
146
-
147
- # for message in client.chat_completion(
148
- # messages,
149
- # max_tokens=max_tokens,
150
- # stream=True,
151
- # temperature=temperature,
152
- # top_p=top_p,
153
- # ):
154
- # token = message.choices[0].delta.content
155
-
156
- # response += token
157
- # yield response
158
-
159
- # """
160
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
161
- # """
162
- # demo = gr.ChatInterface(respond)
163
-
164
- # if __name__ == "__main__":
165
- # demo.launch()
166
-
167
- # import gradio as gr
168
- # from huggingface_hub import InferenceClient
169
-
170
- # # 1. Instantiate with named model param
171
- # client = InferenceClient(model="Qwen/Qwen2.5-Coder-32B-Instruct")
172
-
173
- # def respond(message, history: list[tuple[str, str]]):
174
- # system_message = (
175
- # "You are a helpful and experienced coding assistant specialized in web development. "
176
- # "Help the user by generating complete and functional code for building websites. "
177
- # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
178
- # "based on their requirements."
179
- # )
180
- # max_tokens = 2048
181
- # temperature = 0.7
182
- # top_p = 0.95
183
-
184
- # # Build messages in OpenAI-compatible format
185
- # messages = [{"role": "system", "content": system_message}]
186
- # for user_msg, assistant_msg in history:
187
- # if user_msg:
188
- # messages.append({"role": "user", "content": user_msg})
189
- # if assistant_msg:
190
- # messages.append({"role": "assistant", "content": assistant_msg})
191
- # messages.append({"role": "user", "content": message})
192
-
193
- # response = ""
194
- # # 2. Use named parameters and alias if desired
195
- # for chunk in client.chat.completions.create(
196
- # model="Qwen/Qwen2.5-Coder-32B-Instruct",
197
- # messages=messages,
198
- # max_tokens=max_tokens,
199
- # stream=True,
200
- # temperature=temperature,
201
- # top_p=top_p,
202
- # ):
203
- # # 3. Extract token content
204
- # token = chunk.choices[0].delta.content or ""
205
- # response += token
206
- # yield response
207
-
208
- # # 4. Wire up Gradio chat interface
209
- # demo = gr.ChatInterface(respond, type="messages")
210
-
211
- # if __name__ == "__main__":
212
- # demo.launch()
213
- # import gradio as gr
214
- # from huggingface_hub import InferenceClient
215
-
216
- # hf_token = "HF_TOKEN"
217
-
218
-
219
- # # Ensure token is available
220
- # if hf_token is None:
221
- # raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in .env file or environment.")
222
-
223
- # # Instantiate Hugging Face Inference Client with token
224
- # client = InferenceClient(
225
- # model="Qwen/Qwen2.5-Coder-32B-Instruct",
226
- # token=hf_token
227
- # )
228
-
229
- # def respond(message, history: list[tuple[str, str]]):
230
- # system_message = (
231
- # "You are a helpful and experienced coding assistant specialized in web development. "
232
- # "Help the user by generating complete and functional code for building websites. "
233
- # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
234
- # "based on their requirements."
235
- # )
236
- # max_tokens = 2048
237
- # temperature = 0.7
238
- # top_p = 0.95
239
-
240
- # # Build conversation history
241
- # messages = [{"role": "system", "content": system_message}]
242
- # for user_msg, assistant_msg in history:
243
- # if user_msg:
244
- # messages.append({"role": "user", "content": user_msg})
245
- # if assistant_msg:
246
- # messages.append({"role": "assistant", "content": assistant_msg})
247
- # messages.append({"role": "user", "content": message})
248
-
249
- # response = ""
250
- # # Stream the response from the model
251
- # for chunk in client.chat.completions.create(
252
- # model="Qwen/Qwen2.5-Coder-32B-Instruct",
253
- # messages=messages,
254
- # max_tokens=max_tokens,
255
- # stream=True,
256
- # temperature=temperature,
257
- # top_p=top_p,
258
- # ):
259
- # token = chunk.choices[0].delta.content or ""
260
- # response += token
261
- # yield response
262
-
263
- # # Gradio UI
264
- # demo = gr.ChatInterface(respond, type="messages")
265
-
266
- # if __name__ == "__main__":
267
- # demo.launch()
268
-
269
- # import gradio as gr
270
- # from transformers import AutoTokenizer, AutoModelForCausalLM
271
- # import torch
272
-
273
- # # Load once globally
274
- # tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
275
- # model = AutoModelForCausalLM.from_pretrained(
276
- # "Qwen/Qwen2.5-Coder-32B-Instruct",
277
- # device_map="auto",
278
- # torch_dtype=torch.float16,
279
- # )
280
-
281
- # def respond(message, history):
282
- # system_prompt = (
283
- # "You are a helpful coding assistant specialized in web development. "
284
- # "Provide complete code snippets for HTML, CSS, JS, Flask, Node.js etc."
285
- # )
286
- # # Build input prompt including chat history
287
- # chat_history = ""
288
- # for user_msg, bot_msg in history:
289
- # chat_history += f"User: {user_msg}\nAssistant: {bot_msg}\n"
290
- # prompt = f"{system_prompt}\n{chat_history}User: {message}\nAssistant:"
291
-
292
- # inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
293
- # outputs = model.generate(
294
- # **inputs,
295
- # max_new_tokens=512,
296
- # temperature=0.7,
297
- # do_sample=True,
298
- # top_p=0.95,
299
- # eos_token_id=tokenizer.eos_token_id,
300
- # )
301
- # generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
302
-
303
- # # Extract only the new response part after the prompt
304
- # response = generated_text[len(prompt):].strip()
305
-
306
- # # Append current Q/A to history
307
- # history.append((message, response))
308
- # return "", history
309
-
310
- # demo = gr.ChatInterface(respond, type="messages")
311
-
312
- # if __name__ == "__main__":
313
- # demo.launch()
314
- # import os
315
- # import gradio as gr
316
- # from huggingface_hub import InferenceClient
317
- # from dotenv import load_dotenv
318
-
319
- # # Load .env variables (make sure to have HF_TOKEN in .env or set as env var)
320
- # load_dotenv()
321
- # HF_TOKEN = os.getenv("HF_TOKEN") # or directly assign your token here as string
322
-
323
- # # Initialize InferenceClient with Hugging Face API token
324
- # client = InferenceClient(
325
- # model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
326
- # token=HF_TOKEN
327
- # )
328
-
329
- # def respond(message, history):
330
- # """
331
- # Chat response generator function streaming from Hugging Face Inference API.
332
- # """
333
- # system_message = (
334
- # "You are a helpful and experienced coding assistant specialized in web development. "
335
- # "Help the user by generating complete and functional code for building websites. "
336
- # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
337
- # "based on their requirements."
338
- # )
339
- # max_tokens = 2048
340
- # temperature = 0.7
341
- # top_p = 0.95
342
-
343
- # # Prepare messages in OpenAI chat format
344
- # messages = [{"role": "system", "content": system_message}]
345
- # for user_msg, assistant_msg in history:
346
- # if user_msg:
347
- # messages.append({"role": "user", "content": user_msg})
348
- # if assistant_msg:
349
- # messages.append({"role": "assistant", "content": assistant_msg})
350
- # messages.append({"role": "user", "content": message})
351
-
352
- # response = ""
353
- # # Stream response tokens from Hugging Face Inference API
354
- # for chunk in client.chat.completions.create(
355
- # model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
356
- # messages=messages,
357
- # max_tokens=max_tokens,
358
- # stream=True,
359
- # temperature=temperature,
360
- # top_p=top_p,
361
- # ):
362
- # token = chunk.choices[0].delta.get("content", "")
363
- # response += token
364
- # yield response
365
-
366
- # # Create Gradio chat interface
367
- # demo = gr.ChatInterface(fn=respond, title="Website Building Assistant")
368
-
369
- # if __name__ == "__main__":
370
- # demo.launch()
371
- # import os
372
- # import gradio as gr
373
- # from huggingface_hub import InferenceClient
374
- # from dotenv import load_dotenv
375
-
376
- # # Load environment variables
377
- # load_dotenv()
378
- # HF_TOKEN = os.getenv("HF_TOKEN") # Ensure this is set in .env
379
-
380
- # # Initialize Hugging Face Inference Client
381
- # client = InferenceClient(
382
- # model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
383
- # token=HF_TOKEN
384
- # )
385
-
386
- # # Define system instructions for the chatbot
387
- # system_message = (
388
- # "You are a helpful and experienced coding assistant specialized in web development. "
389
- # "Help the user by generating complete and functional code for building websites. "
390
- # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
391
- # "based on their requirements."
392
- # )
393
-
394
- # # Define the response generation function
395
- # def respond(message, history):
396
- # max_tokens = 2048
397
- # temperature = 0.7
398
- # top_p = 0.95
399
-
400
- # # Convert chat history into OpenAI-style format
401
- # messages = [{"role": "system", "content": system_message}]
402
- # for item in history:
403
- # role = item["role"]
404
- # content = item["content"]
405
- # messages.append({"role": role, "content": content})
406
-
407
- # # Add the latest user message
408
- # messages.append({"role": "user", "content": message})
409
-
410
- # response = ""
411
-
412
- # # Streaming response from the Hugging Face Inference API
413
- # for chunk in client.chat.completions.create(
414
- # model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
415
- # messages=messages,
416
- # max_tokens=max_tokens,
417
- # stream=True,
418
- # temperature=temperature,
419
- # top_p=top_p,
420
- # ):
421
- # token = chunk.choices[0].delta.get("content")
422
- # if token is not None:
423
- # response += token
424
- # yield response
425
-
426
- # # Create Gradio Chat Interface
427
- # demo = gr.ChatInterface(
428
- # fn=respond,
429
- # title="Website Building Assistant",
430
- # chatbot=gr.Chatbot(show_label=False),
431
- # type="openai", # Use OpenAI-style message format
432
- # )
433
-
434
- # if __name__ == "__main__":
435
- # demo.launch()# app.py
436
-
437
- # app.py
438
-
439
- # app.py
440
-
441
  # import os
442
  # import gradio as gr
443
  # from huggingface_hub import InferenceClient
@@ -449,7 +9,7 @@ For more information on `huggingface_hub` Inference API support, please check th
449
 
450
  # # Initialize Hugging Face Inference Client
451
  # client = InferenceClient(
452
- # model="mistralai/Codestral-22B-v0.1",
453
  # token=HF_TOKEN
454
  # )
455
 
@@ -461,7 +21,7 @@ For more information on `huggingface_hub` Inference API support, please check th
461
  # "based on their requirements."
462
  # )
463
 
464
- # # Streaming chatbot logic using chat.completions
465
  # def respond(message, history):
466
  # # Prepare messages with system prompt
467
  # messages = [{"role": "system", "content": system_message}]
@@ -472,7 +32,7 @@ For more information on `huggingface_hub` Inference API support, please check th
472
  # # Stream response from the model
473
  # response = ""
474
  # for chunk in client.chat.completions.create(
475
- # model="mistralai/Codestral-22B-v0.1",
476
  # messages=messages,
477
  # max_tokens=1024,
478
  # temperature=0.7,
@@ -492,9 +52,6 @@ For more information on `huggingface_hub` Inference API support, please check th
492
  # if __name__ == "__main__":
493
  # demo.launch()
494
 
495
-
496
- # app.py
497
-
498
  import os
499
  import gradio as gr
500
  from huggingface_hub import InferenceClient
@@ -506,7 +63,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
506
 
507
  # Initialize Hugging Face Inference Client
508
  client = InferenceClient(
509
- model="mistralai/Mistral-7B-Instruct-v0.3",
510
  token=HF_TOKEN
511
  )
512
 
@@ -522,21 +79,27 @@ system_message = (
522
  def respond(message, history):
523
  # Prepare messages with system prompt
524
  messages = [{"role": "system", "content": system_message}]
525
- for msg in history:
526
- messages.append(msg)
 
527
  messages.append({"role": "user", "content": message})
528
 
529
  # Stream response from the model
530
  response = ""
531
  for chunk in client.chat.completions.create(
532
- model="mistralai/Mistral-7B-Instruct-v0.3",
533
  messages=messages,
534
- max_tokens=1024,
535
  temperature=0.7,
536
  top_p=0.95,
537
  stream=True,
538
  ):
539
- token = chunk.choices[0].delta.get("content", "") or ""
 
 
 
 
 
540
  response += token
541
  yield response
542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import os
2
  # import gradio as gr
3
  # from huggingface_hub import InferenceClient
 
9
 
10
  # # Initialize Hugging Face Inference Client
11
  # client = InferenceClient(
12
+ # model="mistralai/Mistral-7B-Instruct-v0.3",
13
  # token=HF_TOKEN
14
  # )
15
 
 
21
  # "based on their requirements."
22
  # )
23
 
24
+ # # Streaming chatbot logic
25
  # def respond(message, history):
26
  # # Prepare messages with system prompt
27
  # messages = [{"role": "system", "content": system_message}]
 
32
  # # Stream response from the model
33
  # response = ""
34
  # for chunk in client.chat.completions.create(
35
+ # model="mistralai/Mistral-7B-Instruct-v0.3",
36
  # messages=messages,
37
  # max_tokens=1024,
38
  # temperature=0.7,
 
52
  # if __name__ == "__main__":
53
  # demo.launch()
54
 
 
 
 
55
  import os
56
  import gradio as gr
57
  from huggingface_hub import InferenceClient
 
63
 
64
  # Initialize Hugging Face Inference Client
65
  client = InferenceClient(
66
+ model="Qwen/Qwen2.5-Coder-7B-Instruct",
67
  token=HF_TOKEN
68
  )
69
 
 
79
  def respond(message, history):
80
  # Prepare messages with system prompt
81
  messages = [{"role": "system", "content": system_message}]
82
+ for user_msg, assistant_msg in history:
83
+ messages.append({"role": "user", "content": user_msg})
84
+ messages.append({"role": "assistant", "content": assistant_msg})
85
  messages.append({"role": "user", "content": message})
86
 
87
  # Stream response from the model
88
  response = ""
89
  for chunk in client.chat.completions.create(
90
+ model="Qwen/Qwen2.5-Coder-7B-Instruct",
91
  messages=messages,
92
+ max_tokens=2048,
93
  temperature=0.7,
94
  top_p=0.95,
95
  stream=True,
96
  ):
97
+ # Safely handle empty choices
98
+ if not chunk.choices:
99
+ continue
100
+
101
+ # Safely extract token content
102
+ token = chunk.choices[0].delta.content or ""
103
  response += token
104
  yield response
105