Spaces:

Bhaskar2611
/

Code_Generator_best

Running

App Files Files Community

Bhaskar2611 commited on May 20

Commit

e9d8fd0

verified ·

1 Parent(s): 06dd8c4

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -44

app.py CHANGED Viewed

@@ -210,57 +210,103 @@ For more information on `huggingface_hub` Inference API support, please check th
 # if __name__ == "__main__":
 #     demo.launch()
-import gradio as gr
-from huggingface_hub import InferenceClient
-hf_token = "HF_TOKEN"
-# Ensure token is available
-if hf_token is None:
-    raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in .env file or environment.")
-# Instantiate Hugging Face Inference Client with token
-client = InferenceClient(
-    model="Qwen/Qwen2.5-Coder-32B-Instruct",
-    token=hf_token
 )
-def respond(message, history: list[tuple[str, str]]):
-    system_message = (
-        "You are a helpful and experienced coding assistant specialized in web development. "
-        "Help the user by generating complete and functional code for building websites. "
-        "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
-        "based on their requirements."
     )
-    max_tokens = 2048
-    temperature = 0.7
-    top_p = 0.95
-    # Build conversation history
-    messages = [{"role": "system", "content": system_message}]
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    # Stream the response from the model
-    for chunk in client.chat.completions.create(
-        model="Qwen/Qwen2.5-Coder-32B-Instruct",
-        messages=messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = chunk.choices[0].delta.content or ""
-        response += token
-        yield response
-# Gradio UI
 demo = gr.ChatInterface(respond, type="messages")
 if __name__ == "__main__":
@@ -271,3 +317,4 @@ if __name__ == "__main__":

 # if __name__ == "__main__":
 #     demo.launch()
+# import gradio as gr
+# from huggingface_hub import InferenceClient
+# hf_token = "HF_TOKEN"
+# # Ensure token is available
+# if hf_token is None:
+#     raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in .env file or environment.")
+# # Instantiate Hugging Face Inference Client with token
+# client = InferenceClient(
+#     model="Qwen/Qwen2.5-Coder-32B-Instruct",
+#     token=hf_token
+# )
+# def respond(message, history: list[tuple[str, str]]):
+#     system_message = (
+#         "You are a helpful and experienced coding assistant specialized in web development. "
+#         "Help the user by generating complete and functional code for building websites. "
+#         "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
+#         "based on their requirements."
+#     )
+#     max_tokens = 2048
+#     temperature = 0.7
+#     top_p = 0.95
+#     # Build conversation history
+#     messages = [{"role": "system", "content": system_message}]
+#     for user_msg, assistant_msg in history:
+#         if user_msg:
+#             messages.append({"role": "user", "content": user_msg})
+#         if assistant_msg:
+#             messages.append({"role": "assistant", "content": assistant_msg})
+#     messages.append({"role": "user", "content": message})
+#     response = ""
+#     # Stream the response from the model
+#     for chunk in client.chat.completions.create(
+#         model="Qwen/Qwen2.5-Coder-32B-Instruct",
+#         messages=messages,
+#         max_tokens=max_tokens,
+#         stream=True,
+#         temperature=temperature,
+#         top_p=top_p,
+#     ):
+#         token = chunk.choices[0].delta.content or ""
+#         response += token
+#         yield response
+# # Gradio UI
+# demo = gr.ChatInterface(respond, type="messages")
+# if __name__ == "__main__":
+#     demo.launch()
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Load once globally
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
+model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    device_map="auto",
+    torch_dtype=torch.float16,
 )
+def respond(message, history):
+    system_prompt = (
+        "You are a helpful coding assistant specialized in web development. "
+        "Provide complete code snippets for HTML, CSS, JS, Flask, Node.js etc."
+    )
+    # Build input prompt including chat history
+    chat_history = ""
+    for user_msg, bot_msg in history:
+        chat_history += f"User: {user_msg}\nAssistant: {bot_msg}\n"
+    prompt = f"{system_prompt}\n{chat_history}User: {message}\nAssistant:"
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=512,
+        temperature=0.7,
+        do_sample=True,
+        top_p=0.95,
+        eos_token_id=tokenizer.eos_token_id,
     )
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the new response part after the prompt
+    response = generated_text[len(prompt):].strip()
+    # Append current Q/A to history
+    history.append((message, response))
+    return "", history
 demo = gr.ChatInterface(respond, type="messages")
 if __name__ == "__main__":