File size: 11,917 Bytes
7279c69 fc616b7 055baa9 aeb1340 7eb4640 33177b1 edb320d 0c5910e aeb1340 9994f95 f88614f 25656b2 9994f95 f88614f 8883910 8df017e 52c1645 8883910 52c1645 8df017e 30f179c 8883910 cffdd6e b926bbf 8883910 f88614f b051e7d f16ca94 055baa9 f88614f 055baa9 0fad869 edb320d 0fad869 2e7924a 0fad869 1593c74 0fad869 1593c74 25656b2 1593c74 25656b2 54d4ede edb320d 0fad869 25656b2 0fad869 515f14b 0fad869 055baa9 0fad869 52c1645 0fad869 055baa9 0fad869 cffdd6e 0fad869 055baa9 0fad869 055baa9 0fad869 7a301d5 23ce790 7a301d5 0fad869 055baa9 54d4ede 515f14b 0fad869 a4018ab 0fad869 54d4ede fb473f6 e1a6a5c 8599fab f88614f 04937cf a4018ab 04937cf 8d193b1 b926bbf 911c1ac 04937cf 1593c74 8599fab caa9983 d9c7c28 cffdd6e e503691 cffdd6e e503691 6b268d2 cffdd6e 23ce790 cffdd6e e1a6a5c 2a89435 e1a6a5c cffdd6e e1a6a5c 2a89435 e1a6a5c cffdd6e 8599fab 7d2c69b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
import os
import sys
import random
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
print(f"Pyton version {sys.version}.")
# Initialize the FAISS vector store
vector_store = None
# Sample PDF file
sample_filenames = ["Installation.pdf",
"User Guide.pdf",
]
desc = """
### This is a Demo of Retrieval-Augmented Generation (RAG)
**RAG** is an approach that combines retrieval-based and generative LLM models to improve the accuracy and relevance of generated text.
It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content.
This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
Click the button below to load a **User Guide** and an **Installation Guide** for a smoke alarm device into the vector database. It could take a couple of minutes to process.
Once you see the message *"PDF(s) indexed successfully!"*, go to the **Chatbot** tab to ask any relevant questions about the device.
You can change the LLM models in the **Additional Inputs** at the bottom of the **Chatbot** tab, in case of certain model is out of date. You can also adjust the LLM parameters there.
"""
chatbot_css = """
.gradio-container {
font-family: 'Inter', sans-serif;
border-radius: 12px;
overflow: hidden;
}
.panel {
border-radius: 8px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.gr-button {
border-radius: 8px;
padding: 10px 20px;
font-weight: bold;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
transition: all 0.2s ease-in-out;
}
.gr-button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
}
.gr-textbox textarea {
border-radius: 8px;
}
.gr-slider {
padding: 10px 0;
}
.gr-tabitem {
padding: 20px;
}
"""
sample_button = "Load User Guide and Installation Guide documents"
examples_questions = [["How long is the lifespan of this smoke alarm?"],
["How often should I change the battery?"],
["Where should I install the smoke alarm in my home?"],
["How do I test if the smoke alarm is working?"],
["What should I do if the smoke alarm keeps beeping?"],
["Can this smoke alarm detect carbon monoxide too?"],
["How do I clean the smoke alarm properly?"],
["What type of battery does this smoke alarm use?"],
["How loud is the smoke alarm when it goes off?"],
["Can I install this smoke alarm on a wall instead of a ceiling?"],
]
template = \
"""Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say you don't know because no relevant information in the provided documents, don't try to make up an answer.
{context}
Question: {question}
Answer:
"""
# Function to handle PDF upload and indexing
def index_pdf(pdf):
global vector_store
# Load the PDF
loader = PyPDFLoader(pdf.name)
documents = loader.load()
# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
# Embed the chunks
embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
# Store the embeddings in the vector store
vector_store = FAISS.from_documents(texts, embeddings)
return "PDF(s) indexed successfully!"
def load_sample_pdf():
global vector_store
documents = []
# Load the PDFs
for file in sample_filenames:
loader = PyPDFLoader(file)
documents.extend(loader.load())
# print(f"{file} is processed!")
# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(documents)
# Embed the chunks
embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
# Store the embeddings in the vector store
vector_store = FAISS.from_documents(texts, embeddings)
return "PDF(s) indexed successfully!"
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
if vector_store is None:
return "Please upload and index a PDF at the Indexing tab."
if seed == 0:
seed = random.randint(1, 100000)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 16})
llm = ChatGroq(groq_api_key=os.environ.get("GROQ_API_KEY"), model=model)
custom_rag_prompt = PromptTemplate.from_template(template)
docs = retriever.invoke(query)
relevant_info = format_docs(docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| custom_rag_prompt
| llm
| StrOutputParser()
)
response = rag_chain.invoke(query)
return response, relevant_info
additional_inputs = [
gr.Dropdown(choices=["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it"], value="gemma2-9b-it", label="Model"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
]
# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Default() ) as demo:
with gr.Tab("Indexing"):
gr.Markdown(desc)
# pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
# pdf_input = gr.Textbox(label="PDF File")
# index_button = gr.Button("Index PDF")
# load_sample = gr.Button("Alternatively, Load and Index [Attention Is All You Need.pdf] as a Sample")
load_sample = gr.Button(sample_button)
index_output = gr.Textbox(label="Indexing Status")
# index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
load_sample.click(load_sample_pdf, inputs=None, outputs=index_output)
with gr.Tab("Chatbot"):
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
show_label=False,
show_share_button=False,
show_copy_button=True,
layout="panel",
height=500, # Set a fixed height for the chatbot
avatar_images=(
"https://placehold.co/60x60/FFD700/000000?text=U", # User avatar
"https://placehold.co/60x60/6366F1/FFFFFF?text=AI" # Bot avatar
)
)
msg = gr.Textbox(
label="Your Message",
placeholder="Type your message here...",
show_copy_button=True,
container=False # Prevent it from being wrapped in a default container
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.ClearButton() # Will be configured below
gr.Examples(
examples=examples_questions,
inputs=[msg],
outputs=[msg], # Update the message input with the example
label="Quick Examples",
cache_examples=False,
)
with gr.Column(scale=1):
# Additional inputs as separate Gradio components (interactive but don't affect dummy fn)
model_name_textbox = gr.Textbox(label="Model Name", value="dummy-model", interactive=True)
temperature_slider = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.01, label="Temperature", interactive=True)
max_tokens_slider = gr.Slider(minimum=10, maximum=2000, value=500, step=10, label="Max Tokens", interactive=True)
top_p_slider = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="Top P", interactive=True)
seed_number = gr.Number(minimum=0, maximum=100000, value=0, step=1, label="Seed", precision=0, interactive=True)
gr.Markdown("### Retrieved Information")
# Textbox for relevant_info
relevant_info_textbox = gr.Textbox(
label="Retrieved Information",
interactive=False, # Not editable by the user
lines=20,
show_copy_button=True,
autoscroll=True,
container=True # Ensure it has a container for styling
)
# --- Event Handling ---
# This function acts as a wrapper to process inputs and distribute outputs
def process_chat_and_info(message, chat_history, model, temp, max_tok, top_p_val, seed_val):
# Call your generate_response function which returns two values
bot_message, retrieved_info = generate_response(
message, chat_history, model, temp, max_tok, top_p_val, seed_val
)
# Update the chat history for the chatbot component
chat_history.append((message, bot_message))
# Return values in the order of the outputs list
return chat_history, retrieved_info, "" # Clear the message input after sending
# Bind the `process_chat_and_info` function to the submit event of the message textbox
msg.submit(
fn=generate_response,
inputs=[msg, chatbot, model_name_textbox, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
outputs=[chatbot, relevant_info_textbox], # Order matters here: chatbot, relevant_info, then msg
queue=False # Set to True if you expect heavy load
)
# Bind the `process_chat_and_info` function to the click event of the send button
submit_btn.click(
fn=generate_response,
inputs=[msg, chatbot, model_name_textbox, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
outputs=[chatbot, relevant_info_textbox], # Order matters here
queue=False # Set to True if you expect heavy load
)
# Configure the clear button to clear both the chatbot and the relevant_info_textbox
clear_btn.add([msg, chatbot, relevant_info_textbox])
# Launch the Gradio app
demo.launch(share=True)
|