Spaces:

yaswanthsai
/

live-coding-hr-assistant

No application file

App Files Files Community

Yaswanth sai commited on May 4

Commit

bb9e27e

1 Parent(s): e0ab78e

changed the que

Browse files

Files changed (1) hide show

app.py +25 -37

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ base_model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
     device_map="auto",
-    torch_dtype=torch.float32  # Changed from float16 to float32 for better compatibility
 )
 print("Loading fine-tuned model...")
@@ -32,40 +32,22 @@ def generate_response(task_description, code_snippet, request_type, mode="concis
     try:
         # Format the prompt based on request type
         if request_type == "hint":
-            prompt = f"""Task Description: {task_description}
-User's Code:
-{code_snippet}
-AI-HR Assistant: Here's a hint to help you:
-HINT:"""
         elif request_type == "feedback":
-            prompt = f"""Task Description: {task_description}
-User's Code:
-{code_snippet}
-AI-HR Assistant: Here's my feedback on your code:
-FEEDBACK:"""
         else:  # follow-up
-            prompt = f"""Task Description: {task_description}
-User's Code:
-{code_snippet}
-AI-HR Assistant: Here's a follow-up question to extend your learning:
-FOLLOW-UP:"""
-        # Generate response
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=256 if mode == "detailed" else 128,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.95,
-            )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -81,7 +63,7 @@ FOLLOW-UP:"""
     except Exception as e:
         return f"An error occurred: {str(e)}"
-# Create Gradio interface
 with gr.Blocks(title="Live Coding HR Assistant") as demo:
     gr.Markdown("# 💻 Live Coding HR Assistant")
     gr.Markdown("Get hints, feedback, and follow-up questions for your coding tasks!")
@@ -120,9 +102,15 @@ with gr.Blocks(title="Live Coding HR Assistant") as demo:
     submit_btn.click(
         fn=generate_response,
         inputs=[task_description, code_snippet, request_type, mode],
-        outputs=output
     )
-# Launch the app
-if __name__ == "__main__":
-    demo.launch()

     MODEL_NAME,
     trust_remote_code=True,
     device_map="auto",
+    torch_dtype=torch.float32
 )
 print("Loading fine-tuned model...")
     try:
         # Format the prompt based on request type
         if request_type == "hint":
+            prompt = f"Task: {task_description}\nCode:\n{code_snippet}\nHINT:"
         elif request_type == "feedback":
+            prompt = f"Task: {task_description}\nCode:\n{code_snippet}\nFEEDBACK:"
         else:  # follow-up
+            prompt = f"Task: {task_description}\nCode:\n{code_snippet}\nFOLLOW-UP:"
+        # Encode and generate
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=256 if mode == "detailed" else 128,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.95,
+        )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
         return f"An error occurred: {str(e)}"
+# Create Gradio interface with queuing enabled
 with gr.Blocks(title="Live Coding HR Assistant") as demo:
     gr.Markdown("# 💻 Live Coding HR Assistant")
     gr.Markdown("Get hints, feedback, and follow-up questions for your coding tasks!")
     submit_btn.click(
         fn=generate_response,
         inputs=[task_description, code_snippet, request_type, mode],
+        outputs=output,
+        api_name="predict",
+        queue=True,  # Enable queueing
+        max_batch_size=1
     )
+demo.queue(max_size=10).launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    share=True,
+    enable_queue=True
+)