asv7j commited on
Commit
2141949
·
verified ·
1 Parent(s): 318fbfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -31
app.py CHANGED
@@ -8,10 +8,6 @@ access_token = os.getenv("read_access")
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  device = "cpu" # the device to load the model onto
10
 
11
- model = AutoModelForCausalLM.from_pretrained(
12
- "Qwen/Qwen2-0.5B-Instruct",
13
- device_map="auto"
14
- )
15
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
16
 
17
  model1 = AutoModelForCausalLM.from_pretrained(
@@ -31,33 +27,6 @@ app = FastAPI()
31
  @app.get("/")
32
  async def read_root():
33
  return {"Hello": "World!"}
34
-
35
- start_time = time.time()
36
- messages = [
37
- {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
38
- {"role": "user", "content": "I'm Alok. Who are you?"},
39
- {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
40
- {"role": "user", "content": "How are you?"}
41
- ]
42
- text = tokenizer.apply_chat_template(
43
- messages,
44
- tokenize=False,
45
- add_generation_prompt=True
46
- )
47
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
48
- generated_ids = model.generate(
49
- model_inputs.input_ids,
50
- max_new_tokens=64
51
- )
52
- generated_ids = [
53
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
54
- ]
55
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
56
-
57
- end_time = time.time()
58
- time_taken = end_time - start_time
59
- print(time_taken)
60
-
61
 
62
  @app.get("/test")
63
  async def read_droot():
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  device = "cpu" # the device to load the model onto
10
 
 
 
 
 
11
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
12
 
13
  model1 = AutoModelForCausalLM.from_pretrained(
 
27
  @app.get("/")
28
  async def read_root():
29
  return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  @app.get("/test")
32
  async def read_droot():