Ahmadkhan12 commited on
Commit
ba5f07e
·
verified ·
1 Parent(s): b4717d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -55
app.py CHANGED
@@ -1,60 +1,46 @@
 
1
  import os
2
  import streamlit as st
3
- from groq import Groq
4
- from langchain.chains import RetrievalQA
5
- from langchain.vectorstores import FAISS
6
  from langchain.document_loaders import PyPDFLoader
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from io import BytesIO
9
-
10
- # Set up Groq API key
11
- GROQ_API_KEY = "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"
12
-
13
- # Define a custom embedding class for Groq
14
- class GroqEmbedding:
15
- def __init__(self, model="groq-embedding-model"):
16
- self.model = model
17
- self.client = Groq(api_key=GROQ_API_KEY)
18
-
19
- def embed_documents(self, texts):
20
- # Use Groq's API to generate embeddings for documents
21
- embeddings = self.client.embed_documents(texts, model=self.model)
22
- return embeddings
23
-
24
- def embed_query(self, query):
25
- # Use Groq's API to generate embedding for a query
26
- return self.client.embed_query(query, model=self.model)
27
-
28
- # Streamlit App UI
29
- st.title("PDF Question-Answering with Groq Embeddings")
30
-
31
- uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
32
-
33
- # Process the uploaded PDF
34
- if uploaded_file is not None:
35
- # Convert the uploaded file to a BytesIO object to read it in-memory
36
- pdf_file = BytesIO(uploaded_file.read())
37
-
38
- # Load the PDF file with PyPDFLoader
39
- loader = PyPDFLoader(pdf_file)
40
- documents = loader.load()
41
-
42
- # Split documents into smaller chunks for better processing
43
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
44
- split_docs = text_splitter.split_documents(documents)
45
-
46
- # Create embeddings using Groq
47
- embeddings = GroqEmbedding(model="groq-embedding-model") # Use your preferred Groq model
48
-
49
- # Create a FAISS vector store with the embeddings
50
- vector_db = FAISS.from_documents(split_docs, embeddings)
51
 
52
- # Initialize the retrieval-based QA system
53
- qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db)
54
 
55
- # User input for querying the PDF content
56
- query = st.text_input("Ask a question about the PDF:")
57
-
58
- if query:
59
- result = qa.run(query)
60
- st.write("Answer:", result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
  import os
3
  import streamlit as st
 
 
 
4
  from langchain.document_loaders import PyPDFLoader
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.embeddings import Embedding
7
+ from langchain_community.embeddings.groq import GroqEmbedding
8
+
9
+ # Function to process PDF
10
+ def process_pdf(file):
11
+ # Save the uploaded file into a temporary file
12
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
13
+ tmpfile.write(file.read()) # Write the uploaded file's content
14
+ tmpfile_path = tmpfile.name # Get the file path
15
+ return tmpfile_path
16
+
17
+ # Main function to run the app
18
+ def main():
19
+ st.title("PDF Embedding and Query System")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
 
22
 
23
+ if uploaded_file is not None:
24
+ # Process the uploaded PDF file
25
+ tmp_file_path = process_pdf(uploaded_file)
26
+
27
+ # Load the PDF content
28
+ loader = PyPDFLoader(tmp_file_path)
29
+ documents = loader.load()
30
+
31
+ # Use Groq embeddings (assuming Groq API key is set correctly)
32
+ embeddings = GroqEmbedding(api_key="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")
33
+
34
+ # Create a vector database
35
+ vector_db = FAISS.from_documents(documents, embeddings)
36
+
37
+ # Perform search or other actions
38
+ query = st.text_input("Enter a query to search:")
39
+ if query:
40
+ results = vector_db.similarity_search(query, k=5)
41
+ for result in results:
42
+ st.write(result["text"])
43
+
44
+ # Run the app
45
+ if __name__ == "__main__":
46
+ main()