File size: 2,037 Bytes
69afeb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import streamlit as st
from langchain.document_loaders import PDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from dotenv import load_dotenv
import pinecone

# Load the environment variables from the .env file
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')

# Initialize Pinecone
pinecone.init(api_key=pinecone_api_key, environment=pinecone_environment)

# Streamlit app
st.title("Chat with Your Document")
st.write("Upload a PDF file to chat with its content using LangChain, Pinecone, and OpenAI.")

# File upload
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    # Load the PDF file
    pdf_loader = PDFLoader(file_path=uploaded_file)
    documents = pdf_loader.load()

    # Extract text from the PDF
    pdf_text = ""
    for doc in documents:
        pdf_text += doc.text

    # Initialize OpenAI embeddings
    embeddings = OpenAIEmbeddings(api_key=openai_api_key)

    # Create a Pinecone vector store
    index_name = "pdf-analysis"
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(index_name, dimension=embeddings.dimension)
    vector_store = Pinecone(index_name=index_name, embeddings=embeddings)

    # Add the PDF text to the vector store
    vector_store.add_texts([pdf_text])

    # Initialize OpenAI LLM
    llm = OpenAI(api_key=openai_api_key)

    # Chat with the document
    user_input = st.text_input("Ask a question about the document:")
    if st.button("Ask"):
        if user_input:
            response = llm.generate(prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}")
            st.write(response)
        else:
            st.write("Please enter a question to ask.")

    # Display the PDF text
    st.write("Extracted Text from PDF:")
    st.write(pdf_text)