Spaces:

giulio98
/

beyondrag

Runtime error

App Files Files Community

giulio98 commited on Mar 11

Commit

5f059ed

verified ·

1 Parent(s): d748a9d

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -21

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ from utils import (
 )
 # Initialize the model and tokenizer.
-api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
 model_name = "meta-llama/Llama-3.1-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
 model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
@@ -456,7 +456,9 @@ def run_naive_rag_query(collection_name, query, rag_token_size, prefix, task, fe
 def prepare_compression_and_rag(combined_text, retrieval_slider_value, global_local_value, task_description, few_shot, state, progress=gr.Progress()):
     progress(0, desc="Starting compression process")
-    percentage = int(global_local_value.replace('%', ''))
     progress(0.1, desc="Tokenizing text and preparing task")
     question_text = task_description + "\n" + few_shot
     context_encoding = tokenizer(combined_text, return_tensors="pt").to(device)
@@ -538,6 +540,7 @@ def chat_response_stream(message: str, history: list, state: dict, compression_d
     percentage = state["global_local"]
     rag_retrieval_size = int(retrieval_slider_value * (1.0 - (percentage / 100)))
     print("RAG retrieval size: ", rag_retrieval_size)
     if percentage == 0:
         rag_prefix = prefix
         rag_task = state["task_description"]
@@ -583,7 +586,9 @@ def chat_response_stream(message: str, history: list, state: dict, compression_d
 def update_token_breakdown(token_count, retrieval_slider, global_local_value):
     retrieval_context_length = int(token_count / retrieval_slider)
-    percentage = int(global_local_value.replace('%', ''))
     rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
     kv_tokens = retrieval_context_length - rag_tokens
     return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)"
@@ -592,36 +597,51 @@ def update_token_breakdown(token_count, retrieval_slider, global_local_value):
 # Gradio Interface
 ##########################################################################
 CSS = """
-body {
-    font-family: "Times New Roman", Times, serif;
 }
 .upload-section {
     padding: 10px;
     border: 2px dashed #ccc;
     border-radius: 10px;
 }
 .upload-button {
     background: #34c759 !important;
     color: white !important;
     border-radius: 25px !important;
 }
 .chatbot-container {
-    margin-top: 20px;
 }
 .status-output {
     margin-top: 10px;
     font-size: 14px;
 }
 .processing-info {
     margin-top: 5px;
     font-size: 12px;
     color: #666;
 }
 .info-container {
     margin-top: 10px;
     padding: 10px;
     border-radius: 5px;
 }
 .file-list {
     margin-top: 0;
     max-height: 200px;
@@ -630,12 +650,14 @@ body {
     border: 1px solid #eee;
     border-radius: 5px;
 }
 .stats-box {
     margin-top: 10px;
     padding: 10px;
     border-radius: 5px;
     font-size: 12px;
 }
 .submit-btn {
     background: #1a73e8 !important;
     color: white !important;
@@ -644,18 +666,18 @@ body {
     padding: 5px 10px;
     font-size: 16px;
 }
 .input-row {
     display: flex;
     align-items: center;
 }
 """
 def reset_chat_state():
     return gr.update(value="Document not compressed yet. Please compress the document to enable chat."), False
-with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
-    gr.HTML("<h1><center>Beyond RAG with LLama 3.1-8B-Instruct Model</center></h1>")
-    gr.HTML("<center><p>Compress your document and chat with it.</p></center>")
     # Define chat_status_text as a Textbox with a set elem_id for custom styling.
     chat_status_text = gr.Textbox(value="Document not compressed yet. Please compress the document to enable chat.", interactive=False, show_label=False, render=False, lines=5)
@@ -666,13 +688,13 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
     with gr.Row(elem_classes="main-container"):
         with gr.Column(elem_classes="upload-section"):
-            gr.Markdown("## Document Preprocessing")
             with gr.Row():
-                file_input = gr.File(label="Drop file here or upload", file_count="multiple", elem_id="file-upload-area")
-                url_input = gr.Textbox(label="or enter a URL", placeholder="https://example.com/document.pdf")
             with gr.Row():
-                do_ocr = gr.Checkbox(label="Do OCR", value=False)
-                do_table = gr.Checkbox(label="Include Table Structure", value=False)
             with gr.Accordion("Prompt Designer", open=False):
                 task_description_input = gr.Textbox(label="Task Description", value=default_task_description, lines=3, elem_id="task-description")
                 few_shot_input = gr.Textbox(label="Few-Shot Examples", value=default_few_shot, lines=10, elem_id="few-shot")
@@ -682,9 +704,15 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
             retrieval_slider = gr.Slider(label="Select Compression Rate", minimum=1, maximum=32, step=1, value=2)
             retrieval_info_text = gr.Markdown("Number of tokens after compression: ")
             tokens_breakdown_text = gr.Markdown("Token breakdown will appear here.")
-            global_local_slider = gr.Radio(label="Hybrid Retrieval (0 is all RAG, 100 is all global)",
-                                           choices=["0%", "25%", "50%", "75%", "100%"], value="100%")
-            compress_button = gr.Button("Compress Document", interactive=False, elem_classes="upload-button")
             # File input: Run auto_convert then chain reset_chat_state.
             file_input.change(
@@ -785,7 +813,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
             )
         with gr.Column(elem_classes="chatbot-container"):
             chat_status_text.render()
-            gr.Markdown("## Chat")
             chat_interface = gr.ChatInterface(
                 fn=chat_response_stream,
                 additional_inputs=[compressed_doc_state, compression_done],
@@ -793,5 +821,4 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                 fill_height=True
             )
-    demo.queue(max_size=16).launch()

 )
 # Initialize the model and tokenizer.
+api_token = os.getenv("HF_TOKEN")
 model_name = "meta-llama/Llama-3.1-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
 model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
 def prepare_compression_and_rag(combined_text, retrieval_slider_value, global_local_value, task_description, few_shot, state, progress=gr.Progress()):
     progress(0, desc="Starting compression process")
+    # percentage = int(global_local_value.replace('%', ''))
+    percentage = 0 if global_local_value == "RAG" else 100
     progress(0.1, desc="Tokenizing text and preparing task")
     question_text = task_description + "\n" + few_shot
     context_encoding = tokenizer(combined_text, return_tensors="pt").to(device)
     percentage = state["global_local"]
     rag_retrieval_size = int(retrieval_slider_value * (1.0 - (percentage / 100)))
     print("RAG retrieval size: ", rag_retrieval_size)
+    print("Compressed cache: ", compressed_length)
     if percentage == 0:
         rag_prefix = prefix
         rag_task = state["task_description"]
 def update_token_breakdown(token_count, retrieval_slider, global_local_value):
     retrieval_context_length = int(token_count / retrieval_slider)
+    # percentage = int(global_local_value.replace('%', ''))
+    percentage = 0 if global_local_value == "RAG" else 100
     rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
     kv_tokens = retrieval_context_length - rag_tokens
     return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)"
 # Gradio Interface
 ##########################################################################
 CSS = """
+.main-container {
+    display: flex;
+    align-items: stretch;
+}
+.upload-section, .chatbot-container {
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+    overflow-y: auto;
 }
 .upload-section {
     padding: 10px;
     border: 2px dashed #ccc;
     border-radius: 10px;
 }
 .upload-button {
     background: #34c759 !important;
     color: white !important;
     border-radius: 25px !important;
 }
 .chatbot-container {
+    margin-top: 0;
 }
 .status-output {
     margin-top: 10px;
     font-size: 14px;
 }
 .processing-info {
     margin-top: 5px;
     font-size: 12px;
     color: #666;
 }
 .info-container {
     margin-top: 10px;
     padding: 10px;
     border-radius: 5px;
 }
 .file-list {
     margin-top: 0;
     max-height: 200px;
     border: 1px solid #eee;
     border-radius: 5px;
 }
 .stats-box {
     margin-top: 10px;
     padding: 10px;
     border-radius: 5px;
     font-size: 12px;
 }
 .submit-btn {
     background: #1a73e8 !important;
     color: white !important;
     padding: 5px 10px;
     font-size: 16px;
 }
 .input-row {
     display: flex;
     align-items: center;
 }
 """
 def reset_chat_state():
     return gr.update(value="Document not compressed yet. Please compress the document to enable chat."), False
+with gr.Blocks(css=CSS, theme=gr.themes.Soft(font=["Arial", gr.themes.GoogleFont("Inconsolata"), "sans-serif"])) as demo:
+    # gr.HTML("<h1><center>Beyond RAG with LLama 3.1-8B-Instruct Model</center></h1>")
+    gr.HTML("<h1><center>Beyond RAG: Compress your document and chat with it.</center></h1>")
     # Define chat_status_text as a Textbox with a set elem_id for custom styling.
     chat_status_text = gr.Textbox(value="Document not compressed yet. Please compress the document to enable chat.", interactive=False, show_label=False, render=False, lines=5)
     with gr.Row(elem_classes="main-container"):
         with gr.Column(elem_classes="upload-section"):
+            gr.Markdown("### Document Preprocessing")
             with gr.Row():
+                file_input = gr.File(label="Drop file here or upload", file_count="multiple", elem_id="file-upload-area", height=120)
+                url_input = gr.Textbox(label="or enter a URL", placeholder="https://example.com/document.pdf", lines=2)
             with gr.Row():
+                do_ocr = gr.Checkbox(label="Do OCR on Images", value=True, visible=False)
+                do_table = gr.Checkbox(label="Parse Tables", value=True, visible=False)
             with gr.Accordion("Prompt Designer", open=False):
                 task_description_input = gr.Textbox(label="Task Description", value=default_task_description, lines=3, elem_id="task-description")
                 few_shot_input = gr.Textbox(label="Few-Shot Examples", value=default_few_shot, lines=10, elem_id="few-shot")
             retrieval_slider = gr.Slider(label="Select Compression Rate", minimum=1, maximum=32, step=1, value=2)
             retrieval_info_text = gr.Markdown("Number of tokens after compression: ")
             tokens_breakdown_text = gr.Markdown("Token breakdown will appear here.")
+            # global_local_slider = gr.Radio(label="Hybrid Retrieval (0 is all RAG, 100 is all global)",
+            #                                choices=["0%", "25%", "50%", "75%", "100%"], value="100%")
+            global_local_slider = gr.Radio(
+                label="Retrieval Mode",
+                choices=["RAG", "KVCompress"],
+                value="KVCompress"
+            )
+            compress_button = gr.Button("Compress Document", interactive=False, size="md", elem_classes="upload-button")
             # File input: Run auto_convert then chain reset_chat_state.
             file_input.change(
             )
         with gr.Column(elem_classes="chatbot-container"):
             chat_status_text.render()
+            gr.Markdown("## Chat (LLama 3.1-8B-Instruct)")
             chat_interface = gr.ChatInterface(
                 fn=chat_response_stream,
                 additional_inputs=[compressed_doc_state, compression_done],
                 fill_height=True
             )
+    demo.queue(max_size=16).launch()