Update app.py
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ from utils import (
|
|
29 |
)
|
30 |
|
31 |
# Initialize the model and tokenizer.
|
32 |
-
api_token = os.getenv("
|
33 |
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
34 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
35 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
@@ -591,7 +591,7 @@ def update_token_breakdown(token_count, retrieval_slider, global_local_value):
|
|
591 |
|
592 |
rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
|
593 |
kv_tokens = retrieval_context_length - rag_tokens
|
594 |
-
return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)"
|
595 |
|
596 |
##########################################################################
|
597 |
# Gradio Interface
|
@@ -797,12 +797,12 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(font=["Arial", gr.themes.GoogleFont
|
|
797 |
retrieval_slider.change(
|
798 |
fn=update_token_breakdown,
|
799 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
800 |
-
outputs=tokens_breakdown_text
|
801 |
)
|
802 |
global_local_slider.change(
|
803 |
fn=update_token_breakdown,
|
804 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
805 |
-
outputs=tokens_breakdown_text
|
806 |
)
|
807 |
|
808 |
# Compress button: Prepare compression and then update chat status.
|
|
|
29 |
)
|
30 |
|
31 |
# Initialize the model and tokenizer.
|
32 |
+
api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
|
33 |
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
34 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
35 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
|
|
591 |
|
592 |
rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
|
593 |
kv_tokens = retrieval_context_length - rag_tokens
|
594 |
+
return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)", f"Number of tokens after compression: {retrieval_context_length}"
|
595 |
|
596 |
##########################################################################
|
597 |
# Gradio Interface
|
|
|
797 |
retrieval_slider.change(
|
798 |
fn=update_token_breakdown,
|
799 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
800 |
+
outputs=[tokens_breakdown_text, retrieval_info_text]
|
801 |
)
|
802 |
global_local_slider.change(
|
803 |
fn=update_token_breakdown,
|
804 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
805 |
+
outputs=[tokens_breakdown_text, retrieval_info_text]
|
806 |
)
|
807 |
|
808 |
# Compress button: Prepare compression and then update chat status.
|