Keira James commited on
Commit
5bb2577
·
1 Parent(s): 80e7abb

update app

Browse files
Files changed (1) hide show
  1. app.py +35 -6
app.py CHANGED
@@ -2,19 +2,48 @@ import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
- # Load the tokenizer and model (change 'model_name' to your specific model)
6
- model_name = "gpt2" # Replace with your model
 
 
 
 
 
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
  # Function to generate a response
11
  def generate_response(prompt):
12
  if not prompt:
13
  return "Please enter a prompt."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
16
- output = model.generate(**inputs, max_new_tokens=512)
17
- response = tokenizer.decode(output[0], skip_special_tokens=True)
18
 
19
  return response
20
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ # Define the model name
6
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
7
+
8
+ # Load the model and tokenizer
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ torch_dtype="auto",
12
+ device_map="auto"
13
+ )
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
15
 
16
  # Function to generate a response
17
  def generate_response(prompt):
18
  if not prompt:
19
  return "Please enter a prompt."
20
+
21
+ # Create the messages for chat-based model
22
+ messages = [
23
+ {"role": "system", "content": "You are a helpful assistant."},
24
+ {"role": "user", "content": prompt}
25
+ ]
26
+
27
+ # Format the input for the model
28
+ text = tokenizer.apply_chat_template(
29
+ messages,
30
+ tokenize=False,
31
+ add_generation_prompt=True
32
+ )
33
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
34
+
35
+ # Generate model response
36
+ generated_ids = model.generate(
37
+ **model_inputs,
38
+ max_new_tokens=512
39
+ )
40
+
41
+ # Decode and return the response
42
+ generated_ids = [
43
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
44
+ ]
45
 
46
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
47
 
48
  return response
49