Waseem771 commited on
Commit
f52dcd5
·
verified ·
1 Parent(s): a4b567b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -20
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import os
2
  import streamlit as st
3
- from langchain.document_loaders import PDFLoader
4
- from langchain.embeddings import OpenAIEmbeddings
5
- from langchain.vectorstores import Pinecone
6
- from langchain.llms import OpenAI
7
  from dotenv import load_dotenv
8
  import pinecone
9
 
@@ -18,42 +16,41 @@ pinecone.init(api_key=pinecone_api_key, environment=pinecone_environment)
18
 
19
  # Streamlit app
20
  st.title("Chat with Your Document")
21
- st.write("Upload a PDF file to chat with its content using LangChain, Pinecone, and OpenAI.")
22
 
23
  # File upload
24
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
25
 
26
  if uploaded_file is not None:
27
  # Load the PDF file
28
- pdf_loader = PDFLoader(file_path=uploaded_file)
29
- documents = pdf_loader.load()
30
-
31
- # Extract text from the PDF
32
  pdf_text = ""
33
- for doc in documents:
34
- pdf_text += doc.text
 
35
 
36
  # Initialize OpenAI embeddings
37
- embeddings = OpenAIEmbeddings(api_key=openai_api_key)
38
 
39
  # Create a Pinecone vector store
40
  index_name = "pdf-analysis"
41
  if index_name not in pinecone.list_indexes():
42
- pinecone.create_index(index_name, dimension=embeddings.dimension)
43
- vector_store = Pinecone(index_name=index_name, embeddings=embeddings)
44
 
45
  # Add the PDF text to the vector store
46
- vector_store.add_texts([pdf_text])
47
-
48
- # Initialize OpenAI LLM
49
- llm = OpenAI(api_key=openai_api_key)
50
 
51
  # Chat with the document
52
  user_input = st.text_input("Ask a question about the document:")
53
  if st.button("Ask"):
54
  if user_input:
55
- response = llm.generate(prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}")
56
- st.write(response)
 
 
 
 
57
  else:
58
  st.write("Please enter a question to ask.")
59
 
 
1
  import os
2
  import streamlit as st
3
+ import fitz # PyMuPDF
4
+ from openai import OpenAI
 
 
5
  from dotenv import load_dotenv
6
  import pinecone
7
 
 
16
 
17
  # Streamlit app
18
  st.title("Chat with Your Document")
19
+ st.write("Upload a PDF file to chat with its content using Pinecone and OpenAI.")
20
 
21
  # File upload
22
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
23
 
24
  if uploaded_file is not None:
25
  # Load the PDF file
26
+ pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
 
 
 
27
  pdf_text = ""
28
+ for page_num in range(pdf_document.page_count):
29
+ page = pdf_document.load_page(page_num)
30
+ pdf_text += page.get_text()
31
 
32
  # Initialize OpenAI embeddings
33
+ openai.api_key = openai_api_key
34
 
35
  # Create a Pinecone vector store
36
  index_name = "pdf-analysis"
37
  if index_name not in pinecone.list_indexes():
38
+ pinecone.create_index(index_name, dimension=512)
39
+ vector_store = pinecone.Index(index_name)
40
 
41
  # Add the PDF text to the vector store
42
+ vector_store.upsert([(str(i), openai.Embedding.create(input=pdf_text)["data"][0]["embedding"]) for i in range(len(pdf_text))])
 
 
 
43
 
44
  # Chat with the document
45
  user_input = st.text_input("Ask a question about the document:")
46
  if st.button("Ask"):
47
  if user_input:
48
+ response = openai.Completion.create(
49
+ engine="davinci",
50
+ prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}",
51
+ max_tokens=150
52
+ )
53
+ st.write(response.choices[0].text.strip())
54
  else:
55
  st.write("Please enter a question to ask.")
56