girishwangikar commited on
Commit
321120c
Β·
verified Β·
1 Parent(s): e729802

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -47
app.py CHANGED
@@ -1,100 +1,119 @@
1
  import os
2
  import gradio as gr
3
- from transformers import pipeline
4
- from sentence_transformers import SentenceTransformer
5
- from pypdf import PdfReader
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- import numpy as np
 
 
 
8
  from dotenv import load_dotenv
9
 
 
10
  load_dotenv()
 
 
11
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
 
13
- # Initialize models
14
- qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
15
- embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
16
 
17
- # Global variable to store the document chunks and their embeddings
18
- document_store = []
 
 
 
 
 
19
 
20
- def clear_knowledge_base():
21
- global document_store
22
- document_store = []
23
- return "Knowledge base cleared."
24
 
 
25
  def process_pdf(file):
26
- global document_store
27
  if file is not None:
28
- reader = PdfReader(file.name)
29
- text = ""
30
- for page in reader.pages:
31
- text += page.extract_text() + "\n"
32
-
33
- # Simple text splitting (you might want to implement a more sophisticated method)
34
- chunks = [text[i:i+1000] for i in range(0, len(text), 900)]
35
-
36
- document_store = [(chunk, embedding_model.encode(chunk)) for chunk in chunks]
37
- return f"PDF processed. {len(chunks)} chunks added to the knowledge base."
38
  return "No file uploaded."
39
 
 
 
 
 
 
 
 
40
  def process_question(question):
41
- global document_store
42
- if not document_store:
43
  return "Please upload a PDF first.", "", 0
44
-
45
- question_embedding = embedding_model.encode(question)
46
-
47
- # Find the most relevant chunks
48
- similarities = [cosine_similarity([question_embedding], [doc_embedding])[0][0] for _, doc_embedding in document_store]
49
- top_chunk_indices = np.argsort(similarities)[-3:][::-1] # Get top 3 most similar chunks
50
 
51
- context = "\n".join([document_store[i][0] for i in top_chunk_indices])
 
 
52
 
53
- # Use the QA model to get the answer
54
- qa_result = qa_model(question=question, context=context)
55
 
56
- answer = qa_result['answer']
57
- confidence_score = qa_result['score']
58
 
59
- return answer, context, round(confidence_score, 2)
60
 
 
61
  CSS = """
62
  .duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important;}
63
  h3, p, h1 { text-align: center; color: white;}
64
  footer { text-align: center; padding: 10px; width: 100%; background-color: rgba(240, 240, 240, 0.8); z-index: 1000; position: relative; margin-top: 10px; color: black;}
65
  """
66
 
 
67
  FOOTER_TEXT = """
68
  <footer>
69
  <p>If you enjoyed the functionality of the app, please leave a like!<br>
70
- Check out more on <a href="https://www.linkedin.com/in/your-linkedin/" target="_blank">LinkedIn</a> | <a href="https://your-portfolio-url.com/" target="_blank">Portfolio</a></p>
 
71
  </footer>
72
  """
73
 
 
74
  TITLE = "<h1>πŸ“š RAG Document Q&A πŸ“š</h1>"
75
 
 
76
  with gr.Blocks(css=CSS, theme="Nymbo/Nymbo_Theme") as demo:
77
  gr.HTML(TITLE)
78
-
79
  with gr.Tab("PDF Uploader"):
80
  pdf_file = gr.File(label="Upload PDF")
81
  upload_button = gr.Button("Process PDF")
 
82
  upload_output = gr.Textbox(label="Upload Status")
83
- clear_button = gr.Button("Clear Knowledge Base")
84
- clear_output = gr.Textbox(label="Clear Status")
85
-
86
  with gr.Tab("Q&A System"):
87
  question_input = gr.Textbox(lines=2, placeholder="Enter your question here...")
88
  submit_button = gr.Button("Ask Question")
89
  answer_output = gr.Textbox(label="Answer")
90
  context_output = gr.Textbox(label="Relevant Context", lines=10)
91
  confidence_output = gr.Number(label="Confidence Score")
92
-
 
93
  upload_button.click(process_pdf, inputs=[pdf_file], outputs=[upload_output])
94
- clear_button.click(clear_knowledge_base, outputs=[clear_output])
95
  submit_button.click(process_question, inputs=[question_input], outputs=[answer_output, context_output, confidence_output])
96
 
 
 
 
97
  gr.HTML(FOOTER_TEXT)
98
 
 
99
  if __name__ == "__main__":
100
- demo.launch()
 
1
  import os
2
  import gradio as gr
3
+ from langchain_groq import ChatGroq
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain.chains import create_retrieval_chain
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_community.document_loaders import PyPDFLoader
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
  from dotenv import load_dotenv
12
 
13
+ # Load environment variables
14
  load_dotenv()
15
+
16
+ # Load the GROQ API key
17
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
18
 
19
+ # Set up the language model
20
+ llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=GROQ_API_KEY)
 
21
 
22
+ # Define the prompt template
23
+ prompt = ChatPromptTemplate.from_template("""
24
+ Answer the questions based on the provided context only.
25
+ Please provide the most accurate response based on the question.
26
+ <context>{context}</context>
27
+ Question: {input}
28
+ """)
29
 
30
+ # Set up embeddings model
31
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
32
+ vectors = None
 
33
 
34
+ # Function to process PDF files
35
  def process_pdf(file):
36
+ global vectors
37
  if file is not None:
38
+ loader = PyPDFLoader(file.name)
39
+ docs = loader.load()
40
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
41
+ final_documents = text_splitter.split_documents(docs)
42
+ if vectors is None:
43
+ vectors = FAISS.from_documents(final_documents, embeddings)
44
+ else:
45
+ vectors.add_documents(final_documents)
46
+ return "PDF processed and added to the knowledge base."
 
47
  return "No file uploaded."
48
 
49
+ # Function to clear the knowledge base
50
+ def clear_knowledge_base():
51
+ global vectors
52
+ vectors = None
53
+ return "Knowledge base cleared."
54
+
55
+ # Function to process questions
56
  def process_question(question):
57
+ if vectors is None:
 
58
  return "Please upload a PDF first.", "", 0
 
 
 
 
 
 
59
 
60
+ document_chain = create_stuff_documents_chain(llm, prompt)
61
+ retriever = vectors.as_retriever()
62
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
63
 
64
+ response = retrieval_chain.invoke({'input': question})
65
+ context = "\n\n".join([doc.page_content for doc in response["context"]])
66
 
67
+ # Calculate a confidence score based on the relevance of retrieved documents
68
+ confidence_score = sum([doc.metadata.get('score', 0) for doc in response["context"]]) / len(response["context"])
69
 
70
+ return response['answer'], context, round(confidence_score, 2)
71
 
72
+ # CSS styling
73
  CSS = """
74
  .duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important;}
75
  h3, p, h1 { text-align: center; color: white;}
76
  footer { text-align: center; padding: 10px; width: 100%; background-color: rgba(240, 240, 240, 0.8); z-index: 1000; position: relative; margin-top: 10px; color: black;}
77
  """
78
 
79
+ # Footer text
80
  FOOTER_TEXT = """
81
  <footer>
82
  <p>If you enjoyed the functionality of the app, please leave a like!<br>
83
+ Check out more on <a href="https://www.linkedin.com/in/your-linkedin/" target="_blank">LinkedIn</a> |
84
+ <a href="https://your-portfolio-url.com/" target="_blank">Portfolio</a></p>
85
  </footer>
86
  """
87
 
88
+ # Title text
89
  TITLE = "<h1>πŸ“š RAG Document Q&A πŸ“š</h1>"
90
 
91
+ # Gradio interface
92
  with gr.Blocks(css=CSS, theme="Nymbo/Nymbo_Theme") as demo:
93
  gr.HTML(TITLE)
94
+
95
  with gr.Tab("PDF Uploader"):
96
  pdf_file = gr.File(label="Upload PDF")
97
  upload_button = gr.Button("Process PDF")
98
+ clear_button = gr.Button("Clear Knowledge Base")
99
  upload_output = gr.Textbox(label="Upload Status")
100
+
 
 
101
  with gr.Tab("Q&A System"):
102
  question_input = gr.Textbox(lines=2, placeholder="Enter your question here...")
103
  submit_button = gr.Button("Ask Question")
104
  answer_output = gr.Textbox(label="Answer")
105
  context_output = gr.Textbox(label="Relevant Context", lines=10)
106
  confidence_output = gr.Number(label="Confidence Score")
107
+
108
+ # Button actions
109
  upload_button.click(process_pdf, inputs=[pdf_file], outputs=[upload_output])
 
110
  submit_button.click(process_question, inputs=[question_input], outputs=[answer_output, context_output, confidence_output])
111
 
112
+ # Action to clear the knowledge base
113
+ clear_button.click(clear_knowledge_base, outputs=[upload_output])
114
+
115
  gr.HTML(FOOTER_TEXT)
116
 
117
+ # Launch the Gradio app
118
  if __name__ == "__main__":
119
+ demo.launch()