Spaces:

jchen8000
/

RAG_Demo

Sleeping

App Files Files Community

jchen8000 commited on May 29

Commit

eed60c0

verified ·

1 Parent(s): 25b992d

Upload app.py

Browse files

Files changed (1) hide show

app.py +194 -68

app.py CHANGED Viewed

@@ -13,30 +13,71 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-print(f"Pyton version {sys.version}.")
 # Initialize the FAISS vector store
 vector_store = None
 # Sample PDF file
-sample_filenames = ["Installation.pdf",
-                   "User Guide.pdf",
-                  ]
 desc = """
-### This is a Demo of Retrieval-Augmented Generation (RAG)
-**RAG** is an approach that combines retrieval-based and generative LLM models to improve the accuracy and relevance of generated text.
 It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content.
 This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
-Click the button below to load a **User Guide** and an **Installation Guide** for a smoke alarm device into the vector database. It could take a couple of minutes to process.
-Once you see the message *"PDF(s) indexed successfully!"*, go to the **Chatbot** tab to ask any relevant questions about the device.
-You can change the LLM models in the **Additional Inputs** at the bottom of the **Chatbot** tab, in case of certain model is out of date. You can also adjust the LLM parameters there.
 """
 sample_button = "Load User Guide and Installation Guide documents"
@@ -63,33 +104,15 @@ Question: {question}
 Answer:
 """
-# Function to handle PDF upload and indexing
-def index_pdf(pdf):
-    global vector_store
-    # Load the PDF
-    loader = PyPDFLoader(pdf.name)
-    documents = loader.load()
-    # Split the documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    texts = text_splitter.split_documents(documents)
-    # Embed the chunks
-    embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
-    # Store the embeddings in the vector store
-    vector_store = FAISS.from_documents(texts, embeddings)
-    return "PDF(s) indexed successfully!"
-def load_sample_pdf():
     global vector_store
     documents = []
     # Load the PDFs
-    for file in sample_filenames:
-        loader = PyPDFLoader(file)
         documents.extend(loader.load())
         # print(f"{file} is processed!")
@@ -98,7 +121,9 @@ def load_sample_pdf():
     texts = text_splitter.split_documents(documents)
     # Embed the chunks
-    embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
     # Store the embeddings in the vector store
     vector_store = FAISS.from_documents(texts, embeddings)
@@ -112,7 +137,7 @@ def format_docs(docs):
 def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
     if vector_store is None:
-        return "Please upload and index a PDF at the Indexing tab."
     if seed == 0:
         seed = random.randint(1, 100000)
@@ -135,46 +160,147 @@ def generate_response(query, history, model, temperature, max_tokens, top_p, see
     return response, relevant_info
-additional_inputs = [
-    gr.Dropdown(choices=["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it"], value="gemma2-9b-it", label="Model"),
-    gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
-    gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
-    gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
-    gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
-]
-# Create the Gradio interface
-with gr.Blocks(theme=gr.themes.Default()) as demo:
     with gr.Tab("Indexing"):
-        gr.Markdown(desc)
-        # pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
-        # pdf_input = gr.Textbox(label="PDF File")
-        # index_button = gr.Button("Index PDF")
-        # load_sample = gr.Button("Alternatively, Load and Index [Attention Is All You Need.pdf] as a Sample")
-        load_sample = gr.Button(sample_button)
-        index_output = gr.Textbox(label="Indexing Status")
-        # index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
-        load_sample.click(load_sample_pdf, inputs=None, outputs=index_output)
-    with gr.Tab("Chatbot"):
         with gr.Row():
             with gr.Column():
-                gr.ChatInterface(
-                    fn=generate_response,
-                    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
                     examples=examples_questions,
-                    additional_inputs=additional_inputs,
                     cache_examples=False,
                 )
-            # with gr.Column():
-            #     retrieve_button = gr.Button("Retrieve Relevant Info")
-            #     relevant_info = gr.Textbox(
-            #         label="Retrieved Information",
-            #         interactive=False,
-            #         lines=20,
-            #     )
-# Launch the Gradio app
-demo.launch(share=True)

 from langchain_core.runnables import RunnablePassthrough
 # Initialize the FAISS vector store
 vector_store = None
 # Sample PDF file
+sample_filenames = ["User Guide.pdf",
+                    "Installation.pdf",
+                   ]
 desc = """
+<h2 style="text-align: center; color: #333;">This is a Demo of RAG (Retrieval-Augmented Generation)</h2>
+<p style="text-align: left; color: #555;">
+<b>RAG</b> is an approach that combines retrieval-based and generative LLM models to improve the accuracy and relevance of generated text.
 It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content.
 This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
+</p>
+<hr/>
+"""
+desc_pdf_upload = """
+<p style="text-align: left; color: #555;">
+Choose the PDF files and click <b>Load and Index Documents</b> button below to upload and index the files. It could take some time depends on the size of files.
+Once you see the message <i>"PDF(s) indexed successfully!"</i> in the below <b>Indexing Status</b>, go to the <b>Chatbot</b> tab to ask any relevant questios.
+</p>
+"""
+desc_sample = """
+<p style="text-align: left; color: #555;">
+Alternatively, click the button below to load a <b>User Guide</b> and an <b>Installation</b> for a smoke alarm device into the vector database. It could take a couple of minutes to process.
+Once you see the message <i>"PDF(s) indexed successfully!"</i> in the below <b>Indexing Status</b>, go to the <b>Chatbot</b> tab to ask any relevant questions about the device.
+</p>
+"""
+gui_css="""
+    .gradio-container {
+        font-family: 'Inter', sans-serif;
+        border-radius: 12px;
+        overflow: hidden;
+    }
+    .panel {
+        border-radius: 8px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .gr-button {
+        border-radius: 8px;
+        padding: 10px 20px;
+        font-weight: bold;
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        transition: all 0.2s ease-in-out;
+    }
+    .gr-button:hover {
+        transform: translateY(-2px);
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
+    }
+    .gr-textbox textarea {
+        border-radius: 8px;
+    }
+    .gr-slider {
+        padding: 10px 0;
+    }
+    .gr-tabitem {
+        padding: 20px;
+    }
 """
 sample_button = "Load User Guide and Installation Guide documents"
 Answer:
 """
+# Function to handle PDF upload and indexing
+def load_pdf(files):
     global vector_store
     documents = []
     # Load the PDFs
+    for file in files:
+        loader = PyPDFLoader(file.name)
         documents.extend(loader.load())
         # print(f"{file} is processed!")
     texts = text_splitter.split_documents(documents)
     # Embed the chunks
+    # embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    embedding_model_name = "bert-base-uncased"
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, encode_kwargs={"normalize_embeddings": True})
     # Store the embeddings in the vector store
     vector_store = FAISS.from_documents(texts, embeddings)
 def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
     if vector_store is None:
+        return "Please upload and index a PDF at the Indexing tab.", ""
     if seed == 0:
         seed = random.randint(1, 100000)
     return response, relevant_info
+template = """
+You are a helpful AI assistant. Use the following context to answer the question.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+{context}
+Question: {question}
+"""
+# --- Gradio Interface using gr.Blocks() ---
+with gr.Blocks(theme=gr.themes.Soft(), css=gui_css) as demo:
     with gr.Tab("Indexing"):
+        with gr.Row():
+            gr.Markdown(desc)
         with gr.Row():
             with gr.Column():
+                gr.Markdown(desc_pdf_upload)
+                pdf_files = gr.File(label="Upload PDF Document", file_types=[".pdf"], interactive=True, file_count="multiple")
+                load_button = gr.Button("Load and Index Documents", variant="secondary")
+            with gr.Column():
+                gr.Markdown(desc_sample)
+                sample_files = gr.File(
+                    label="Sample PDF Files",
+                    file_count="multiple",
+                    file_types=[".pdf"],
+                    value=sample_filenames,
+                    visible=True,
+                    interactive=False
+                    )
+                sample_button = gr.Button(sample_button)
+        with gr.Row():
+            index_output = gr.Textbox(label="Indexing Status")
+            sample_button.click(load_pdf, inputs=sample_files, outputs=index_output)
+            load_button.click(load_pdf, inputs=pdf_files, outputs=index_output)
+    with gr.Tab("Chatbot"):
+        with gr.Row():
+            with gr.Column(scale=2):
+                # Chatbot component
+                chatbot = gr.Chatbot(
+                    show_label=False,
+                    show_share_button=False,
+                    show_copy_button=True,
+                    layout="panel",
+                    height=500, # Set a fixed height for the chatbot
+                    avatar_images=(
+                        "https://placehold.co/60x60/FFD700/000000?text=U", # User avatar
+                        "https://placehold.co/60x60/6366F1/FFFFFF?text=AI" # Bot avatar
+                    )
+                )
+                # Message input textbox
+                msg = gr.Textbox(
+                    label="Your Message",
+                    placeholder="Type your message here...",
+                    show_copy_button=True,
+                    container=False # Prevent it from being wrapped in a default container
+                )
+                with gr.Row():
+                    submit_btn = gr.Button("Send", variant="primary")
+                    clear_btn = gr.ClearButton() # Will be configured below
+                gr.Examples(
                     examples=examples_questions,
+                    inputs=[msg],
+                    outputs=[msg], # Update the message input with the example
+                    label="Quick Examples",
                     cache_examples=False,
                 )
+            with gr.Column(scale=1):
+                gr.Markdown("### LLM Settings")
+                model_name = gr.Dropdown(label="Model Name",
+                    choices=[
+                        "llama-3.3-70b-versatile",
+                        "llama-3.1-8b-instant",
+                        "llama3-70b-8192",
+                        "llama3-8b-8192",
+                        "mixtral-8x7b-32768",
+                        "gemma2-9b-it"
+                    ],
+                    value="llama-3.3-70b-versatile",
+                    interactive=True
+                )
+                temperature_slider = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.01, label="Temperature", interactive=True)
+                max_tokens_slider = gr.Slider(minimum=10, maximum=2000, value=500, step=10, label="Max Tokens", interactive=True)
+                top_p_slider = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="Top P", interactive=True)
+                seed_number = gr.Number(minimum=0, maximum=100000, value=0, step=1, label="Seed", precision=0, interactive=True)
+                gr.Markdown("### Retrieved Information")
+                # Textbox for relevant_info
+                relevant_info_textbox = gr.Textbox(
+                    label="Retrieved Information",
+                    interactive=False, # Not editable by the user
+                    lines=20,
+                    show_copy_button=True,
+                    autoscroll=True,
+                    container=True # Ensure it has a container for styling
+                )
+        # --- Event Handling ---
+        # This function acts as a wrapper to process inputs and distribute outputs
+        def process_chat_and_info(message, chat_history, model, temp, max_tok, top_p_val, seed_val):
+            # Call your generate_response function which returns two values
+            bot_message, retrieved_info = generate_response(
+                message, chat_history, model, temp, max_tok, top_p_val, seed_val
+            )
+            # Update the chat history for the chatbot component
+            chat_history.append((message, bot_message))
+            # Return values in the order of the outputs list
+            return chat_history, retrieved_info, "" # Clear the message input after sending
+        # Bind the `process_chat_and_info` function to the submit event of the message textbox
+        msg.submit(
+            fn=process_chat_and_info,
+            inputs=[msg, chatbot, model_name, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
+            outputs=[chatbot, relevant_info_textbox, msg], # Order matters here: chatbot, relevant_info, then msg
+            queue=False # Set to True if you expect heavy load
+        )
+        # Bind the `process_chat_and_info` function to the click event of the send button
+        submit_btn.click(
+            fn=process_chat_and_info,
+            inputs=[msg, chatbot, model_name, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
+            outputs=[chatbot, relevant_info_textbox, msg], # Order matters here
+            queue=False # Set to True if you expect heavy load
+        )
+        # Configure the clear button to clear both the chatbot and the relevant_info_textbox
+        clear_btn.add([msg, chatbot, relevant_info_textbox])
+demo.launch(server_name="0.0.0.0", server_port=7860)