Bhaskar2611 commited on
Commit
e9d8fd0
·
verified ·
1 Parent(s): 06dd8c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -44
app.py CHANGED
@@ -210,57 +210,103 @@ For more information on `huggingface_hub` Inference API support, please check th
210
 
211
  # if __name__ == "__main__":
212
  # demo.launch()
213
- import gradio as gr
214
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- hf_token = "HF_TOKEN"
 
 
 
 
 
 
 
 
 
 
 
 
217
 
 
 
218
 
219
- # Ensure token is available
220
- if hf_token is None:
221
- raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in .env file or environment.")
222
 
223
- # Instantiate Hugging Face Inference Client with token
224
- client = InferenceClient(
225
- model="Qwen/Qwen2.5-Coder-32B-Instruct",
226
- token=hf_token
 
 
 
 
 
 
227
  )
228
 
229
- def respond(message, history: list[tuple[str, str]]):
230
- system_message = (
231
- "You are a helpful and experienced coding assistant specialized in web development. "
232
- "Help the user by generating complete and functional code for building websites. "
233
- "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
234
- "based on their requirements."
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  )
236
- max_tokens = 2048
237
- temperature = 0.7
238
- top_p = 0.95
239
-
240
- # Build conversation history
241
- messages = [{"role": "system", "content": system_message}]
242
- for user_msg, assistant_msg in history:
243
- if user_msg:
244
- messages.append({"role": "user", "content": user_msg})
245
- if assistant_msg:
246
- messages.append({"role": "assistant", "content": assistant_msg})
247
- messages.append({"role": "user", "content": message})
248
-
249
- response = ""
250
- # Stream the response from the model
251
- for chunk in client.chat.completions.create(
252
- model="Qwen/Qwen2.5-Coder-32B-Instruct",
253
- messages=messages,
254
- max_tokens=max_tokens,
255
- stream=True,
256
- temperature=temperature,
257
- top_p=top_p,
258
- ):
259
- token = chunk.choices[0].delta.content or ""
260
- response += token
261
- yield response
262
-
263
- # Gradio UI
264
  demo = gr.ChatInterface(respond, type="messages")
265
 
266
  if __name__ == "__main__":
@@ -271,3 +317,4 @@ if __name__ == "__main__":
271
 
272
 
273
 
 
 
210
 
211
  # if __name__ == "__main__":
212
  # demo.launch()
213
+ # import gradio as gr
214
+ # from huggingface_hub import InferenceClient
215
+
216
+ # hf_token = "HF_TOKEN"
217
+
218
+
219
+ # # Ensure token is available
220
+ # if hf_token is None:
221
+ # raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in .env file or environment.")
222
+
223
+ # # Instantiate Hugging Face Inference Client with token
224
+ # client = InferenceClient(
225
+ # model="Qwen/Qwen2.5-Coder-32B-Instruct",
226
+ # token=hf_token
227
+ # )
228
+
229
+ # def respond(message, history: list[tuple[str, str]]):
230
+ # system_message = (
231
+ # "You are a helpful and experienced coding assistant specialized in web development. "
232
+ # "Help the user by generating complete and functional code for building websites. "
233
+ # "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
234
+ # "based on their requirements."
235
+ # )
236
+ # max_tokens = 2048
237
+ # temperature = 0.7
238
+ # top_p = 0.95
239
+
240
+ # # Build conversation history
241
+ # messages = [{"role": "system", "content": system_message}]
242
+ # for user_msg, assistant_msg in history:
243
+ # if user_msg:
244
+ # messages.append({"role": "user", "content": user_msg})
245
+ # if assistant_msg:
246
+ # messages.append({"role": "assistant", "content": assistant_msg})
247
+ # messages.append({"role": "user", "content": message})
248
 
249
+ # response = ""
250
+ # # Stream the response from the model
251
+ # for chunk in client.chat.completions.create(
252
+ # model="Qwen/Qwen2.5-Coder-32B-Instruct",
253
+ # messages=messages,
254
+ # max_tokens=max_tokens,
255
+ # stream=True,
256
+ # temperature=temperature,
257
+ # top_p=top_p,
258
+ # ):
259
+ # token = chunk.choices[0].delta.content or ""
260
+ # response += token
261
+ # yield response
262
 
263
+ # # Gradio UI
264
+ # demo = gr.ChatInterface(respond, type="messages")
265
 
266
+ # if __name__ == "__main__":
267
+ # demo.launch()
 
268
 
269
+ import gradio as gr
270
+ from transformers import AutoTokenizer, AutoModelForCausalLM
271
+ import torch
272
+
273
+ # Load once globally
274
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
275
+ model = AutoModelForCausalLM.from_pretrained(
276
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
277
+ device_map="auto",
278
+ torch_dtype=torch.float16,
279
  )
280
 
281
+ def respond(message, history):
282
+ system_prompt = (
283
+ "You are a helpful coding assistant specialized in web development. "
284
+ "Provide complete code snippets for HTML, CSS, JS, Flask, Node.js etc."
285
+ )
286
+ # Build input prompt including chat history
287
+ chat_history = ""
288
+ for user_msg, bot_msg in history:
289
+ chat_history += f"User: {user_msg}\nAssistant: {bot_msg}\n"
290
+ prompt = f"{system_prompt}\n{chat_history}User: {message}\nAssistant:"
291
+
292
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
293
+ outputs = model.generate(
294
+ **inputs,
295
+ max_new_tokens=512,
296
+ temperature=0.7,
297
+ do_sample=True,
298
+ top_p=0.95,
299
+ eos_token_id=tokenizer.eos_token_id,
300
  )
301
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
302
+
303
+ # Extract only the new response part after the prompt
304
+ response = generated_text[len(prompt):].strip()
305
+
306
+ # Append current Q/A to history
307
+ history.append((message, response))
308
+ return "", history
309
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  demo = gr.ChatInterface(respond, type="messages")
311
 
312
  if __name__ == "__main__":
 
317
 
318
 
319
 
320
+