Spaces:

nafisneehal
/

chanbot

Sleeping

App Files Files Community

nafisneehal commited on Nov 14, 2024

Commit

4752301

verified ·

1 Parent(s): a6831cd

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -12

app.py CHANGED Viewed

@@ -23,29 +23,27 @@ print(f"Low memory mode: {LOW_MEMORY}")
 load_in_4bit = True  # Use 4-bit quantization if memory is constrained
 # Load model and tokenizer with device mapping
-# Replace with the name of your trained model
 model_name = "nafisneehal/chandler_bot"
 model = AutoPeftModelForCausalLM.from_pretrained(
     model_name,
-    load_in_4bit=load_in_4bit,
-    device_map="auto" if device == "cuda" else None  # Automatic GPU mapping
 )
 model.to(device)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Define prompt structure (update if necessary for your model)
-alpaca_prompt = "{instruction} {input} {output}"
 @spaces.GPU  # Use GPU provided by Hugging Face Spaces if available
 def generate_response(user_input, chat_history):
-    instruction = "Chat with me like Chandler talks."
     input_text = user_input  # Treats user input as the input
     # Prepare inputs for model inference on the correct device
-    inputs = tokenizer(
-        [alpaca_prompt.format(instruction, input_text, "")],
-        return_tensors="pt"
-    ).to(device)  # Ensure tensors are on the correct device
     # Generate response on GPU or CPU as appropriate
     with torch.no_grad():
@@ -63,7 +61,7 @@ def generate_response(user_input, chat_history):
 # Set up Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Llama-Based Chatbot on GPU")
     chat_history = gr.Chatbot(label="Chat History")
     user_input = gr.Textbox(
@@ -76,4 +74,4 @@ with gr.Blocks() as demo:
     submit_btn.click(generate_response, [user_input, chat_history], [
                      chat_history, user_input])
-demo.launch()

 load_in_4bit = True  # Use 4-bit quantization if memory is constrained
 # Load model and tokenizer with device mapping
 model_name = "nafisneehal/chandler_bot"
 model = AutoPeftModelForCausalLM.from_pretrained(
     model_name,
+    load_in_4bit=load_in_4bit
 )
 model.to(device)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Define prompt structure (update as needed for your model)
+alpaca_prompt = "{instruction} {input_text} {output}"
 @spaces.GPU  # Use GPU provided by Hugging Face Spaces if available
 def generate_response(user_input, chat_history):
+    instruction = "Chat with me like Chandler talks."
     input_text = user_input  # Treats user input as the input
+    # Format the input using the prompt template
+    formatted_input = alpaca_prompt.format(instruction=instruction, input_text=input_text, output="")
     # Prepare inputs for model inference on the correct device
+    inputs = tokenizer([formatted_input], return_tensors="pt").to(device)
     # Generate response on GPU or CPU as appropriate
     with torch.no_grad():
 # Set up Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Chandler-Like Chatbot on GPU")
     chat_history = gr.Chatbot(label="Chat History")
     user_input = gr.Textbox(
     submit_btn.click(generate_response, [user_input, chat_history], [
                      chat_history, user_input])
+demo.launch(share=True)  # Enables a public link