Spaces:
Running
Running
File size: 5,312 Bytes
10f65bf 321120c aa8c099 10f65bf c1a2399 720091e e729802 321120c 3292957 10f65bf 321120c d25a56b 321120c 7aac66b 321120c 10f65bf 321120c 10f65bf 321120c 10f65bf 321120c 10f65bf 720091e 10f65bf 720091e 321120c 10f65bf e729802 720091e b9fe0c7 aa8c099 b9fe0c7 e66953a 720091e e66953a 720091e aa8c099 720091e aa8c099 720091e 10f65bf 321120c 10f65bf 7aac66b 10f65bf 321120c 10f65bf 321120c 10f65bf 321120c 10f65bf 321120c 10f65bf 321120c 10f65bf 720091e 10f65bf 321120c 10f65bf 321120c 10f65bf 720091e 321120c 10f65bf 321120c 10f65bf e27807c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
"""
### How to Use the App:
1. **Upload PDF:** Upload PDF and click 'Process PDF' to add them to the knowledge base.
2. **Ask a Question:** Switch to the 'Q&A System' tab, enter your question, and click 'Ask Question' to get an answer based on the uploaded PDF content.
3. **Clear Data:** Click 'Clear Knowledge Base' to reset and remove all uploaded documents.
Ensure a PDF is uploaded before asking questions.
"""
# Load environment variables
load_dotenv()
# Load the GROQ API key
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# Set up the language model
llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=GROQ_API_KEY)
# Define the prompt template
prompt = ChatPromptTemplate.from_template("""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question.
<context>{context}</context>
Question: {input}
""")
# Set up embeddings model
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectors = None
# Function to process PDF files
def process_pdf(file):
global vectors
if file is not None:
loader = PyPDFLoader(file.name)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(docs)
if vectors is None:
vectors = FAISS.from_documents(final_documents, embeddings)
else:
vectors.add_documents(final_documents)
return "PDF processed and added to the knowledge base."
return "No file uploaded."
# Function to clear the knowledge base
def clear_knowledge_base():
global vectors
vectors = None # Reset the vector store
return "Knowledge base cleared."
def process_question(question):
global vectors
if vectors is None:
return "Please upload a PDF first.", "", 0
# Create document retrieval chain
retriever = vectors.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Use the invoke method for retrieving relevant documents
documents = retriever.invoke(question)
if not documents:
return "No relevant context found.", "", 0
# Create context from retrieved documents
context = "\n\n".join([doc.page_content for doc in documents])
# Combine context and question into a single string (formatted input for LLM)
prompt = f"Answer the question based on the provided context.\n\nContext: {context}\n\nQuestion: {question}"
# Pass the string to llm.invoke
response = llm.invoke(prompt)
# Confidence score as average relevance
confidence_score = sum([doc.metadata.get('score', 0) for doc in documents]) / len(documents)
return response, context, round(confidence_score, 2)
# CSS styling
CSS = """
.duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important;}
h3, p, h1 { text-align: center; color: white;}
footer { text-align: center; padding: 10px; width: 100%; background-color: rgba(240, 240, 240, 0.8); z-index: 1000; position: relative; margin-top: 10px; color: black;}
"""
# Footer text
FOOTER_TEXT = """
<footer>
<p>If you enjoyed the functionality of the app, please leave a like!<br>
Check out more on <a href="https://www.linkedin.com/in/your-linkedin/" target="_blank">LinkedIn</a> |
<a href="https://your-portfolio-url.com/" target="_blank">Portfolio</a></p>
</footer>
"""
# Title text
TITLE = "<h1>π RAG Document Q&A π</h1>"
# Gradio interface
with gr.Blocks(css=CSS, theme="Nymbo/Nymbo_Theme") as demo:
gr.HTML(TITLE)
with gr.Tab("PDF Uploader"):
pdf_file = gr.File(label="Upload PDF")
upload_button = gr.Button("Process PDF")
clear_button = gr.Button("Clear Knowledge Base") # New button to clear the knowledge base
upload_output = gr.Textbox(label="Upload Status")
with gr.Tab("Q&A System"):
question_input = gr.Textbox(lines=2, placeholder="Enter your question here...")
submit_button = gr.Button("Ask Question")
answer_output = gr.Textbox(label="Answer")
context_output = gr.Textbox(label="Relevant Context", lines=10)
confidence_output = gr.Number(label="Confidence Score")
# Button actions
upload_button.click(process_pdf, inputs=[pdf_file], outputs=[upload_output])
submit_button.click(process_question, inputs=[question_input], outputs=[answer_output, context_output, confidence_output])
# Action to clear the knowledge base
clear_button.click(clear_knowledge_base, outputs=[upload_output])
gr.HTML(FOOTER_TEXT)
# Launch the Gradio app
if __name__ == "__main__":
demo.launch()
|