nafisneehal commited on
Commit
4752301
·
verified ·
1 Parent(s): a6831cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -23,29 +23,27 @@ print(f"Low memory mode: {LOW_MEMORY}")
23
  load_in_4bit = True # Use 4-bit quantization if memory is constrained
24
 
25
  # Load model and tokenizer with device mapping
26
- # Replace with the name of your trained model
27
  model_name = "nafisneehal/chandler_bot"
28
  model = AutoPeftModelForCausalLM.from_pretrained(
29
  model_name,
30
- load_in_4bit=load_in_4bit,
31
- device_map="auto" if device == "cuda" else None # Automatic GPU mapping
32
  )
33
  model.to(device)
34
  tokenizer = AutoTokenizer.from_pretrained(model_name)
35
 
36
- # Define prompt structure (update if necessary for your model)
37
- alpaca_prompt = "{instruction} {input} {output}"
38
 
39
  @spaces.GPU # Use GPU provided by Hugging Face Spaces if available
40
  def generate_response(user_input, chat_history):
41
- instruction = "Chat with me like Chandler talks."
42
  input_text = user_input # Treats user input as the input
43
 
 
 
 
44
  # Prepare inputs for model inference on the correct device
45
- inputs = tokenizer(
46
- [alpaca_prompt.format(instruction, input_text, "")],
47
- return_tensors="pt"
48
- ).to(device) # Ensure tensors are on the correct device
49
 
50
  # Generate response on GPU or CPU as appropriate
51
  with torch.no_grad():
@@ -63,7 +61,7 @@ def generate_response(user_input, chat_history):
63
 
64
  # Set up Gradio interface
65
  with gr.Blocks() as demo:
66
- gr.Markdown("# Llama-Based Chatbot on GPU")
67
 
68
  chat_history = gr.Chatbot(label="Chat History")
69
  user_input = gr.Textbox(
@@ -76,4 +74,4 @@ with gr.Blocks() as demo:
76
  submit_btn.click(generate_response, [user_input, chat_history], [
77
  chat_history, user_input])
78
 
79
- demo.launch()
 
23
  load_in_4bit = True # Use 4-bit quantization if memory is constrained
24
 
25
  # Load model and tokenizer with device mapping
 
26
  model_name = "nafisneehal/chandler_bot"
27
  model = AutoPeftModelForCausalLM.from_pretrained(
28
  model_name,
29
+ load_in_4bit=load_in_4bit
 
30
  )
31
  model.to(device)
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
 
34
+ # Define prompt structure (update as needed for your model)
35
+ alpaca_prompt = "{instruction} {input_text} {output}"
36
 
37
  @spaces.GPU # Use GPU provided by Hugging Face Spaces if available
38
  def generate_response(user_input, chat_history):
39
+ instruction = "Chat with me like Chandler talks."
40
  input_text = user_input # Treats user input as the input
41
 
42
+ # Format the input using the prompt template
43
+ formatted_input = alpaca_prompt.format(instruction=instruction, input_text=input_text, output="")
44
+
45
  # Prepare inputs for model inference on the correct device
46
+ inputs = tokenizer([formatted_input], return_tensors="pt").to(device)
 
 
 
47
 
48
  # Generate response on GPU or CPU as appropriate
49
  with torch.no_grad():
 
61
 
62
  # Set up Gradio interface
63
  with gr.Blocks() as demo:
64
+ gr.Markdown("# Chandler-Like Chatbot on GPU")
65
 
66
  chat_history = gr.Chatbot(label="Chat History")
67
  user_input = gr.Textbox(
 
74
  submit_btn.click(generate_response, [user_input, chat_history], [
75
  chat_history, user_input])
76
 
77
+ demo.launch(share=True) # Enables a public link