toilaluan commited on
Commit
37fc80f
·
1 Parent(s): 18501f6
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -22,7 +22,7 @@ def get_model_kv_cache(context_ids):
22
  kv_cache = DynamicCache.from_legacy_cache(
23
  past_key_values
24
  )
25
- return past_key_values
26
 
27
  @spaces.GPU
28
  def inference(question: str, doc_path: str, use_turbo=True) -> str:
@@ -43,6 +43,7 @@ def inference(question: str, doc_path: str, use_turbo=True) -> str:
43
  print("turbo-mode-off")
44
  kv_cache = get_model_kv_cache(context_ids)
45
 
 
46
 
47
  answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
48
  print(answer)
 
22
  kv_cache = DynamicCache.from_legacy_cache(
23
  past_key_values
24
  )
25
+ return kv_cache
26
 
27
  @spaces.GPU
28
  def inference(question: str, doc_path: str, use_turbo=True) -> str:
 
43
  print("turbo-mode-off")
44
  kv_cache = get_model_kv_cache(context_ids)
45
 
46
+ print("kv-length", kv_cache.get_seq_length())
47
 
48
  answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
49
  print(answer)