Spaces:

rehanafzal
/

pdf_document_reader

Sleeping

App Files Files Community

rehanafzal commited on Dec 26, 2024

Commit

7b65368

verified ·

1 Parent(s): d99f851

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -16

app.py CHANGED Viewed

@@ -1,3 +1,100 @@
 import os
 from groq import Groq
 from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -8,8 +105,7 @@ import streamlit as st
 from tempfile import NamedTemporaryFile
 # Initialize Groq client
-client = Groq(api_key=os.getenv("Groq_api_key"))
-# client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 # Function to extract text from a PDF
 def extract_text_from_pdf(pdf_file_path):
@@ -60,11 +156,12 @@ if uploaded_file:
         pdf_path = temp_file.name
     # Extract text, chunk it, and create embeddings
-    text = extract_text_from_pdf(pdf_path)
-    chunks = chunk_text(text)
-    vector_db = create_embeddings_and_store(chunks)
-    # State management for the chat
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
@@ -75,21 +172,16 @@ if uploaded_file:
         st.write("---")
     # Add new query input dynamically
-    if "query_count" not in st.session_state:
-        st.session_state.query_count = 1
-    query_key = f"query_{st.session_state.query_count}"
-    user_query = st.text_input(f"Enter Query {st.session_state.query_count}:", key=query_key)
     if user_query:
         # Generate response
-        response = query_vector_db(user_query, vector_db)
         # Append query and response to the chat history
         st.session_state.chat_history.append({"query": user_query, "response": response})
-        # Increment query count for the next input box
-        st.session_state.query_count += 1
-        # Rerun to show the updated UI
-        st.experimental_rerun()

+# import os
+# from groq import Groq
+# from langchain_community.embeddings import HuggingFaceEmbeddings
+# from langchain_community.vectorstores import FAISS
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+# from PyPDF2 import PdfReader
+# import streamlit as st
+# from tempfile import NamedTemporaryFile
+# # Initialize Groq client
+# client = Groq(api_key=os.getenv("Groq_api_key"))
+# # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# # Function to extract text from a PDF
+# def extract_text_from_pdf(pdf_file_path):
+#     pdf_reader = PdfReader(pdf_file_path)
+#     text = ""
+#     for page in pdf_reader.pages:
+#         text += page.extract_text()
+#     return text
+# # Function to split text into chunks
+# def chunk_text(text, chunk_size=500, chunk_overlap=50):
+#     text_splitter = RecursiveCharacterTextSplitter(
+#         chunk_size=chunk_size, chunk_overlap=chunk_overlap
+#     )
+#     return text_splitter.split_text(text)
+# # Function to create embeddings and store them in FAISS
+# def create_embeddings_and_store(chunks):
+#     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+#     vector_db = FAISS.from_texts(chunks, embedding=embeddings)
+#     return vector_db
+# # Function to query the vector database and interact with Groq
+# def query_vector_db(query, vector_db):
+#     # Retrieve relevant documents
+#     docs = vector_db.similarity_search(query, k=3)
+#     context = "\n".join([doc.page_content for doc in docs])
+#     # Interact with Groq API
+#     chat_completion = client.chat.completions.create(
+#         messages=[
+#             {"role": "system", "content": f"Use the following context:\n{context}"},
+#             {"role": "user", "content": query},
+#         ],
+#         model="llama3-8b-8192",
+#     )
+#     return chat_completion.choices[0].message.content
+# # Streamlit app
+# st.title("Interactive PDF Reader and Chat")
+# # Upload PDF
+# uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
+# if uploaded_file:
+#     with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+#         temp_file.write(uploaded_file.read())
+#         pdf_path = temp_file.name
+#     # Extract text, chunk it, and create embeddings
+#     text = extract_text_from_pdf(pdf_path)
+#     chunks = chunk_text(text)
+#     vector_db = create_embeddings_and_store(chunks)
+#     # State management for the chat
+#     if "chat_history" not in st.session_state:
+#         st.session_state.chat_history = []
+#     # Display chat history
+#     for i, chat in enumerate(st.session_state.chat_history):
+#         st.write(f"**Query {i+1}:** {chat['query']}")
+#         st.write(f"**Response:** {chat['response']}")
+#         st.write("---")
+#     # Add new query input dynamically
+#     if "query_count" not in st.session_state:
+#         st.session_state.query_count = 1
+#     query_key = f"query_{st.session_state.query_count}"
+#     user_query = st.text_input(f"Enter Query {st.session_state.query_count}:", key=query_key)
+#     if user_query:
+#         # Generate response
+#         response = query_vector_db(user_query, vector_db)
+#         # Append query and response to the chat history
+#         st.session_state.chat_history.append({"query": user_query, "response": response})
+#         # Increment query count for the next input box
+#         st.session_state.query_count += 1
+#         # Rerun to show the updated UI
+#         st.experimental_rerun()
 import os
 from groq import Groq
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from tempfile import NamedTemporaryFile
 # Initialize Groq client
+client = Groq(api_key=os.environ.get("Goq_api_key"))
 # Function to extract text from a PDF
 def extract_text_from_pdf(pdf_file_path):
         pdf_path = temp_file.name
     # Extract text, chunk it, and create embeddings
+    if "vector_db" not in st.session_state:
+        text = extract_text_from_pdf(pdf_path)
+        chunks = chunk_text(text)
+        st.session_state.vector_db = create_embeddings_and_store(chunks)
+    # Initialize chat history if not already done
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
         st.write("---")
     # Add new query input dynamically
+    query_key = f"query_{len(st.session_state.chat_history) + 1}"
+    user_query = st.text_input("Enter your query:", key=query_key)
     if user_query:
         # Generate response
+        response = query_vector_db(user_query, st.session_state.vector_db)
         # Append query and response to the chat history
         st.session_state.chat_history.append({"query": user_query, "response": response})
+        # Refresh the app without needing manual rerun
+        st.experimental_set_query_params(rerun="true")