giulio98 commited on
Commit
5928341
·
verified ·
1 Parent(s): e8e31b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -29,7 +29,7 @@ from utils import (
29
  )
30
 
31
  # Initialize the model and tokenizer.
32
- api_token = os.getenv("HF_TOKEN")
33
  model_name = "meta-llama/Llama-3.1-8B-Instruct"
34
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
35
  model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
@@ -591,7 +591,7 @@ def update_token_breakdown(token_count, retrieval_slider, global_local_value):
591
 
592
  rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
593
  kv_tokens = retrieval_context_length - rag_tokens
594
- return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)"
595
 
596
  ##########################################################################
597
  # Gradio Interface
@@ -797,12 +797,12 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(font=["Arial", gr.themes.GoogleFont
797
  retrieval_slider.change(
798
  fn=update_token_breakdown,
799
  inputs=[hidden_token_count, retrieval_slider, global_local_slider],
800
- outputs=tokens_breakdown_text
801
  )
802
  global_local_slider.change(
803
  fn=update_token_breakdown,
804
  inputs=[hidden_token_count, retrieval_slider, global_local_slider],
805
- outputs=tokens_breakdown_text
806
  )
807
 
808
  # Compress button: Prepare compression and then update chat status.
 
29
  )
30
 
31
  # Initialize the model and tokenizer.
32
+ api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
33
  model_name = "meta-llama/Llama-3.1-8B-Instruct"
34
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
35
  model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
 
591
 
592
  rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
593
  kv_tokens = retrieval_context_length - rag_tokens
594
+ return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)", f"Number of tokens after compression: {retrieval_context_length}"
595
 
596
  ##########################################################################
597
  # Gradio Interface
 
797
  retrieval_slider.change(
798
  fn=update_token_breakdown,
799
  inputs=[hidden_token_count, retrieval_slider, global_local_slider],
800
+ outputs=[tokens_breakdown_text, retrieval_info_text]
801
  )
802
  global_local_slider.change(
803
  fn=update_token_breakdown,
804
  inputs=[hidden_token_count, retrieval_slider, global_local_slider],
805
+ outputs=[tokens_breakdown_text, retrieval_info_text]
806
  )
807
 
808
  # Compress button: Prepare compression and then update chat status.