Spaces:

Waseem771
/

chat_With_document

Sleeping

App Files Files Community

Waseem771 commited on Feb 24

Commit

f52dcd5

verified ·

1 Parent(s): a4b567b

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -20

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import os
 import streamlit as st
-from langchain.document_loaders import PDFLoader
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import Pinecone
-from langchain.llms import OpenAI
 from dotenv import load_dotenv
 import pinecone
@@ -18,42 +16,41 @@ pinecone.init(api_key=pinecone_api_key, environment=pinecone_environment)
 # Streamlit app
 st.title("Chat with Your Document")
-st.write("Upload a PDF file to chat with its content using LangChain, Pinecone, and OpenAI.")
 # File upload
 uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 if uploaded_file is not None:
     # Load the PDF file
-    pdf_loader = PDFLoader(file_path=uploaded_file)
-    documents = pdf_loader.load()
-    # Extract text from the PDF
     pdf_text = ""
-    for doc in documents:
-        pdf_text += doc.text
     # Initialize OpenAI embeddings
-    embeddings = OpenAIEmbeddings(api_key=openai_api_key)
     # Create a Pinecone vector store
     index_name = "pdf-analysis"
     if index_name not in pinecone.list_indexes():
-        pinecone.create_index(index_name, dimension=embeddings.dimension)
-    vector_store = Pinecone(index_name=index_name, embeddings=embeddings)
     # Add the PDF text to the vector store
-    vector_store.add_texts([pdf_text])
-    # Initialize OpenAI LLM
-    llm = OpenAI(api_key=openai_api_key)
     # Chat with the document
     user_input = st.text_input("Ask a question about the document:")
     if st.button("Ask"):
         if user_input:
-            response = llm.generate(prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}")
-            st.write(response)
         else:
             st.write("Please enter a question to ask.")

 import os
 import streamlit as st
+import fitz  # PyMuPDF
+from openai import OpenAI
 from dotenv import load_dotenv
 import pinecone
 # Streamlit app
 st.title("Chat with Your Document")
+st.write("Upload a PDF file to chat with its content using Pinecone and OpenAI.")
 # File upload
 uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 if uploaded_file is not None:
     # Load the PDF file
+    pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
     pdf_text = ""
+    for page_num in range(pdf_document.page_count):
+        page = pdf_document.load_page(page_num)
+        pdf_text += page.get_text()
     # Initialize OpenAI embeddings
+    openai.api_key = openai_api_key
     # Create a Pinecone vector store
     index_name = "pdf-analysis"
     if index_name not in pinecone.list_indexes():
+        pinecone.create_index(index_name, dimension=512)
+    vector_store = pinecone.Index(index_name)
     # Add the PDF text to the vector store
+    vector_store.upsert([(str(i), openai.Embedding.create(input=pdf_text)["data"][0]["embedding"]) for i in range(len(pdf_text))])
     # Chat with the document
     user_input = st.text_input("Ask a question about the document:")
     if st.button("Ask"):
         if user_input:
+            response = openai.Completion.create(
+                engine="davinci",
+                prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}",
+                max_tokens=150
+            )
+            st.write(response.choices[0].text.strip())
         else:
             st.write("Please enter a question to ask.")