Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

App Files Files Community

Ahmadkhan12 commited on Nov 24, 2024

Commit

ba5f07e

verified ·

1 Parent(s): b4717d0

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -55

app.py CHANGED Viewed

@@ -1,60 +1,46 @@
 import os
 import streamlit as st
-from groq import Groq
-from langchain.chains import RetrievalQA
-from langchain.vectorstores import FAISS
 from langchain.document_loaders import PyPDFLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from io import BytesIO
-# Set up Groq API key
-GROQ_API_KEY = "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"
-# Define a custom embedding class for Groq
-class GroqEmbedding:
-    def __init__(self, model="groq-embedding-model"):
-        self.model = model
-        self.client = Groq(api_key=GROQ_API_KEY)
-    def embed_documents(self, texts):
-        # Use Groq's API to generate embeddings for documents
-        embeddings = self.client.embed_documents(texts, model=self.model)
-        return embeddings
-    def embed_query(self, query):
-        # Use Groq's API to generate embedding for a query
-        return self.client.embed_query(query, model=self.model)
-# Streamlit App UI
-st.title("PDF Question-Answering with Groq Embeddings")
-uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
-# Process the uploaded PDF
-if uploaded_file is not None:
-    # Convert the uploaded file to a BytesIO object to read it in-memory
-    pdf_file = BytesIO(uploaded_file.read())
-    # Load the PDF file with PyPDFLoader
-    loader = PyPDFLoader(pdf_file)
-    documents = loader.load()
-    # Split documents into smaller chunks for better processing
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    split_docs = text_splitter.split_documents(documents)
-    # Create embeddings using Groq
-    embeddings = GroqEmbedding(model="groq-embedding-model")  # Use your preferred Groq model
-    # Create a FAISS vector store with the embeddings
-    vector_db = FAISS.from_documents(split_docs, embeddings)
-    # Initialize the retrieval-based QA system
-    qa = RetrievalQA.from_chain_type(llm=None, chain_type="stuff", vectorstore=vector_db)
-    # User input for querying the PDF content
-    query = st.text_input("Ask a question about the PDF:")
-    if query:
-        result = qa.run(query)
-        st.write("Answer:", result)

+import tempfile
 import os
 import streamlit as st
 from langchain.document_loaders import PyPDFLoader
+from langchain.vectorstores import FAISS
+from langchain.embeddings import Embedding
+from langchain_community.embeddings.groq import GroqEmbedding
+# Function to process PDF
+def process_pdf(file):
+    # Save the uploaded file into a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
+        tmpfile.write(file.read())  # Write the uploaded file's content
+        tmpfile_path = tmpfile.name  # Get the file path
+    return tmpfile_path
+# Main function to run the app
+def main():
+    st.title("PDF Embedding and Query System")
+    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+    if uploaded_file is not None:
+        # Process the uploaded PDF file
+        tmp_file_path = process_pdf(uploaded_file)
+        # Load the PDF content
+        loader = PyPDFLoader(tmp_file_path)
+        documents = loader.load()
+        # Use Groq embeddings (assuming Groq API key is set correctly)
+        embeddings = GroqEmbedding(api_key="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")
+        # Create a vector database
+        vector_db = FAISS.from_documents(documents, embeddings)
+        # Perform search or other actions
+        query = st.text_input("Enter a query to search:")
+        if query:
+            results = vector_db.similarity_search(query, k=5)
+            for result in results:
+                st.write(result["text"])
+# Run the app
+if __name__ == "__main__":
+    main()