Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ def get_model_kv_cache(context_ids):
|
|
22 |
kv_cache = DynamicCache.from_legacy_cache(
|
23 |
past_key_values
|
24 |
)
|
25 |
-
return
|
26 |
|
27 |
@spaces.GPU
|
28 |
def inference(question: str, doc_path: str, use_turbo=True) -> str:
|
@@ -43,6 +43,7 @@ def inference(question: str, doc_path: str, use_turbo=True) -> str:
|
|
43 |
print("turbo-mode-off")
|
44 |
kv_cache = get_model_kv_cache(context_ids)
|
45 |
|
|
|
46 |
|
47 |
answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
|
48 |
print(answer)
|
|
|
22 |
kv_cache = DynamicCache.from_legacy_cache(
|
23 |
past_key_values
|
24 |
)
|
25 |
+
return kv_cache
|
26 |
|
27 |
@spaces.GPU
|
28 |
def inference(question: str, doc_path: str, use_turbo=True) -> str:
|
|
|
43 |
print("turbo-mode-off")
|
44 |
kv_cache = get_model_kv_cache(context_ids)
|
45 |
|
46 |
+
print("kv-length", kv_cache.get_seq_length())
|
47 |
|
48 |
answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
|
49 |
print(answer)
|