Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -20,12 +20,22 @@ model1 = AutoModelForCausalLM.from_pretrained(
|
|
20 |
torch_dtype="auto",
|
21 |
device_map="auto"
|
22 |
)
|
23 |
-
|
24 |
time4 = time.time()
|
25 |
print(time4-time3)
|
26 |
app = FastAPI()
|
27 |
time5 = time.time()
|
28 |
print(time5-time4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
@app.get("/")
|
30 |
async def read_root():
|
31 |
return {"Hello": "World!"}
|
@@ -130,3 +140,37 @@ async def read_droot():
|
|
130 |
print(time_taken)
|
131 |
return {"Hello": "World!"}
|
132 |
#return {response: time}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
torch_dtype="auto",
|
21 |
device_map="auto"
|
22 |
)
|
23 |
+
tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
|
24 |
time4 = time.time()
|
25 |
print(time4-time3)
|
26 |
app = FastAPI()
|
27 |
time5 = time.time()
|
28 |
print(time5-time4)
|
29 |
+
|
30 |
+
|
31 |
+
tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
|
32 |
+
model2 = AutoModelForCausalLM.from_pretrained(
|
33 |
+
"google/gemma-2-2b-it",
|
34 |
+
device_map="auto",
|
35 |
+
torch_dtype=torch.bfloat16,
|
36 |
+
)
|
37 |
+
|
38 |
+
|
39 |
@app.get("/")
|
40 |
async def read_root():
|
41 |
return {"Hello": "World!"}
|
|
|
140 |
print(time_taken)
|
141 |
return {"Hello": "World!"}
|
142 |
#return {response: time}
|
143 |
+
|
144 |
+
|
145 |
+
@app.get("/tet")
|
146 |
+
async def read_droot():
|
147 |
+
starttime = time.time()
|
148 |
+
messages = [
|
149 |
+
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
|
150 |
+
{"role": "user", "content": "I'm Alok. Who are you?"},
|
151 |
+
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
|
152 |
+
{"role": "user", "content": "How are you?"}
|
153 |
+
]
|
154 |
+
text = tokenizer.apply_chat_template(
|
155 |
+
messages,
|
156 |
+
tokenize=False,
|
157 |
+
add_generation_prompt=True
|
158 |
+
)
|
159 |
+
model_inputs = tokenizer2([text], return_tensors="pt").to(device)
|
160 |
+
|
161 |
+
generated_ids = model2.generate(
|
162 |
+
model_inputs.input_ids,
|
163 |
+
max_new_tokens=64
|
164 |
+
)
|
165 |
+
generated_ids = [
|
166 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
167 |
+
]
|
168 |
+
|
169 |
+
response = tokenizer2.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
170 |
+
respons = tokenizer1.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
171 |
+
print(response)
|
172 |
+
end_time = time.time()
|
173 |
+
time_taken = end_time - starttime
|
174 |
+
print(time_taken)
|
175 |
+
return {"Hello": respons}
|
176 |
+
#return {response: time}
|