Spaces:
Running
Running
File size: 2,037 Bytes
69afeb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import os
import streamlit as st
from langchain.document_loaders import PDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from dotenv import load_dotenv
import pinecone
# Load the environment variables from the .env file
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')
# Initialize Pinecone
pinecone.init(api_key=pinecone_api_key, environment=pinecone_environment)
# Streamlit app
st.title("Chat with Your Document")
st.write("Upload a PDF file to chat with its content using LangChain, Pinecone, and OpenAI.")
# File upload
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
# Load the PDF file
pdf_loader = PDFLoader(file_path=uploaded_file)
documents = pdf_loader.load()
# Extract text from the PDF
pdf_text = ""
for doc in documents:
pdf_text += doc.text
# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
# Create a Pinecone vector store
index_name = "pdf-analysis"
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=embeddings.dimension)
vector_store = Pinecone(index_name=index_name, embeddings=embeddings)
# Add the PDF text to the vector store
vector_store.add_texts([pdf_text])
# Initialize OpenAI LLM
llm = OpenAI(api_key=openai_api_key)
# Chat with the document
user_input = st.text_input("Ask a question about the document:")
if st.button("Ask"):
if user_input:
response = llm.generate(prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}")
st.write(response)
else:
st.write("Please enter a question to ask.")
# Display the PDF text
st.write("Extracted Text from PDF:")
st.write(pdf_text) |