File size: 5,312 Bytes
10f65bf
 
321120c
 
 
 
 
 
 
 
aa8c099
10f65bf
 
c1a2399
 
 
 
 
 
 
 
 
 
 
720091e
e729802
321120c
 
3292957
10f65bf
321120c
 
d25a56b
321120c
 
 
 
 
 
 
7aac66b
321120c
 
 
10f65bf
321120c
10f65bf
321120c
10f65bf
321120c
 
 
 
 
 
 
 
 
10f65bf
 
720091e
 
 
 
 
 
10f65bf
720091e
321120c
10f65bf
e729802
720091e
 
b9fe0c7
aa8c099
b9fe0c7
e66953a
720091e
 
e66953a
720091e
 
aa8c099
 
 
720091e
aa8c099
 
720091e
 
 
 
 
10f65bf
321120c
10f65bf
7aac66b
 
 
10f65bf
 
321120c
10f65bf
 
 
321120c
 
10f65bf
 
 
321120c
10f65bf
 
321120c
10f65bf
 
321120c
10f65bf
 
 
720091e
10f65bf
321120c
10f65bf
 
 
 
 
 
321120c
 
10f65bf
 
720091e
 
 
321120c
10f65bf
 
321120c
10f65bf
e27807c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv

"""
### How to Use the App:

1. **Upload PDF:** Upload PDF and click 'Process PDF' to add them to the knowledge base.
2. **Ask a Question:** Switch to the 'Q&A System' tab, enter your question, and click 'Ask Question' to get an answer based on the uploaded PDF content.
3. **Clear Data:** Click 'Clear Knowledge Base' to reset and remove all uploaded documents.

Ensure a PDF is uploaded before asking questions.
"""


# Load environment variables
load_dotenv()

# Load the GROQ API key
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

# Set up the language model
llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=GROQ_API_KEY)

# Define the prompt template
prompt = ChatPromptTemplate.from_template("""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question.
<context>{context}</context>
Question: {input}
""")

# Set up embeddings model
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectors = None

# Function to process PDF files
def process_pdf(file):
    global vectors
    if file is not None:
        loader = PyPDFLoader(file.name)
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        final_documents = text_splitter.split_documents(docs)
        if vectors is None:
            vectors = FAISS.from_documents(final_documents, embeddings)
        else:
            vectors.add_documents(final_documents)
        return "PDF processed and added to the knowledge base."
    return "No file uploaded."

# Function to clear the knowledge base
def clear_knowledge_base():
    global vectors
    vectors = None  # Reset the vector store
    return "Knowledge base cleared."

def process_question(question):
    global vectors
    if vectors is None:
        return "Please upload a PDF first.", "", 0
    
    # Create document retrieval chain
    retriever = vectors.as_retriever(search_type="similarity", search_kwargs={"k": 5})
    
    # Use the invoke method for retrieving relevant documents
    documents = retriever.invoke(question)
    
    if not documents:
        return "No relevant context found.", "", 0
    
    # Create context from retrieved documents
    context = "\n\n".join([doc.page_content for doc in documents])
    
    # Combine context and question into a single string (formatted input for LLM)
    prompt = f"Answer the question based on the provided context.\n\nContext: {context}\n\nQuestion: {question}"

    # Pass the string to llm.invoke
    response = llm.invoke(prompt)

    # Confidence score as average relevance
    confidence_score = sum([doc.metadata.get('score', 0) for doc in documents]) / len(documents)

    return response, context, round(confidence_score, 2)

# CSS styling
CSS = """
.duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important;}
h3, p, h1 { text-align: center; color: white;}
footer { text-align: center; padding: 10px; width: 100%; background-color: rgba(240, 240, 240, 0.8); z-index: 1000; position: relative; margin-top: 10px; color: black;}
"""

# Footer text
FOOTER_TEXT = """
<footer>
    <p>If you enjoyed the functionality of the app, please leave a like!<br>
    Check out more on <a href="https://www.linkedin.com/in/your-linkedin/" target="_blank">LinkedIn</a> | 
    <a href="https://your-portfolio-url.com/" target="_blank">Portfolio</a></p>
</footer>
"""

# Title text
TITLE = "<h1>πŸ“š RAG Document Q&A πŸ“š</h1>"

# Gradio interface
with gr.Blocks(css=CSS, theme="Nymbo/Nymbo_Theme") as demo:
    gr.HTML(TITLE)

    with gr.Tab("PDF Uploader"):
        pdf_file = gr.File(label="Upload PDF")
        upload_button = gr.Button("Process PDF")
        clear_button = gr.Button("Clear Knowledge Base")  # New button to clear the knowledge base
        upload_output = gr.Textbox(label="Upload Status")

    with gr.Tab("Q&A System"):
        question_input = gr.Textbox(lines=2, placeholder="Enter your question here...")
        submit_button = gr.Button("Ask Question")
        answer_output = gr.Textbox(label="Answer")
        context_output = gr.Textbox(label="Relevant Context", lines=10)
        confidence_output = gr.Number(label="Confidence Score")

    # Button actions
    upload_button.click(process_pdf, inputs=[pdf_file], outputs=[upload_output])
    submit_button.click(process_question, inputs=[question_input], outputs=[answer_output, context_output, confidence_output])
    
    # Action to clear the knowledge base
    clear_button.click(clear_knowledge_base, outputs=[upload_output])

    gr.HTML(FOOTER_TEXT)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch()