asv7j commited on
Commit
92b7d80
·
verified ·
1 Parent(s): 60f62d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -1
app.py CHANGED
@@ -20,12 +20,22 @@ model1 = AutoModelForCausalLM.from_pretrained(
20
  torch_dtype="auto",
21
  device_map="auto"
22
  )
23
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
24
  time4 = time.time()
25
  print(time4-time3)
26
  app = FastAPI()
27
  time5 = time.time()
28
  print(time5-time4)
 
 
 
 
 
 
 
 
 
 
29
  @app.get("/")
30
  async def read_root():
31
  return {"Hello": "World!"}
@@ -130,3 +140,37 @@ async def read_droot():
130
  print(time_taken)
131
  return {"Hello": "World!"}
132
  #return {response: time}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  torch_dtype="auto",
21
  device_map="auto"
22
  )
23
+ tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
24
  time4 = time.time()
25
  print(time4-time3)
26
  app = FastAPI()
27
  time5 = time.time()
28
  print(time5-time4)
29
+
30
+
31
+ tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
32
+ model2 = AutoModelForCausalLM.from_pretrained(
33
+ "google/gemma-2-2b-it",
34
+ device_map="auto",
35
+ torch_dtype=torch.bfloat16,
36
+ )
37
+
38
+
39
  @app.get("/")
40
  async def read_root():
41
  return {"Hello": "World!"}
 
140
  print(time_taken)
141
  return {"Hello": "World!"}
142
  #return {response: time}
143
+
144
+
145
+ @app.get("/tet")
146
+ async def read_droot():
147
+ starttime = time.time()
148
+ messages = [
149
+ {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
150
+ {"role": "user", "content": "I'm Alok. Who are you?"},
151
+ {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
152
+ {"role": "user", "content": "How are you?"}
153
+ ]
154
+ text = tokenizer.apply_chat_template(
155
+ messages,
156
+ tokenize=False,
157
+ add_generation_prompt=True
158
+ )
159
+ model_inputs = tokenizer2([text], return_tensors="pt").to(device)
160
+
161
+ generated_ids = model2.generate(
162
+ model_inputs.input_ids,
163
+ max_new_tokens=64
164
+ )
165
+ generated_ids = [
166
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
167
+ ]
168
+
169
+ response = tokenizer2.batch_decode(generated_ids, skip_special_tokens=True)[0]
170
+ respons = tokenizer1.batch_decode(generated_ids, skip_special_tokens=True)[0]
171
+ print(response)
172
+ end_time = time.time()
173
+ time_taken = end_time - starttime
174
+ print(time_taken)
175
+ return {"Hello": respons}
176
+ #return {response: time}