Spaces:
Sleeping
Sleeping
update
Browse files
utils.py
CHANGED
@@ -7,6 +7,9 @@ import io
|
|
7 |
from transformers import DynamicCache
|
8 |
import os
|
9 |
import spaces
|
|
|
|
|
|
|
10 |
|
11 |
os.makedirs("tmp", exist_ok=True)
|
12 |
|
@@ -54,7 +57,7 @@ def generate_answer(
|
|
54 |
)
|
55 |
new_id = outputs.logits[0, -1].argmax()
|
56 |
generated_ids.append(new_id)
|
57 |
-
if new_id.item()
|
58 |
break
|
59 |
|
60 |
answer = tokenizer.decode(torch.stack(generated_ids), skip_special_tokens=True)
|
@@ -84,6 +87,8 @@ def get_condense_kv_cache(context: str):
|
|
84 |
response = requests.post(url, json=payload, headers=headers).json()
|
85 |
print(response)
|
86 |
numpy_kv_cache, error = load_npy_from_url(response["compressed_kv_url"])
|
|
|
|
|
87 |
kv_cache = DynamicCache.from_legacy_cache(
|
88 |
torch.from_numpy(numpy_kv_cache).to("cuda").to(torch.bfloat16)
|
89 |
)
|
|
|
7 |
from transformers import DynamicCache
|
8 |
import os
|
9 |
import spaces
|
10 |
+
import httpx
|
11 |
+
import tqdm
|
12 |
+
|
13 |
|
14 |
os.makedirs("tmp", exist_ok=True)
|
15 |
|
|
|
57 |
)
|
58 |
new_id = outputs.logits[0, -1].argmax()
|
59 |
generated_ids.append(new_id)
|
60 |
+
if new_id.item() == model.generation_config.eos_token_id:
|
61 |
break
|
62 |
|
63 |
answer = tokenizer.decode(torch.stack(generated_ids), skip_special_tokens=True)
|
|
|
87 |
response = requests.post(url, json=payload, headers=headers).json()
|
88 |
print(response)
|
89 |
numpy_kv_cache, error = load_npy_from_url(response["compressed_kv_url"])
|
90 |
+
if error:
|
91 |
+
print(error)
|
92 |
kv_cache = DynamicCache.from_legacy_cache(
|
93 |
torch.from_numpy(numpy_kv_cache).to("cuda").to(torch.bfloat16)
|
94 |
)
|