File size: 2,692 Bytes
e3395e5
020df6e
3cc9e3d
020df6e
 
 
 
 
 
 
 
 
e3395e5
3cc9e3d
 
020df6e
 
 
 
 
 
 
 
 
 
 
e3395e5
 
020df6e
e3395e5
020df6e
 
 
 
e3395e5
020df6e
 
 
e3395e5
020df6e
 
 
e3395e5
020df6e
 
e3395e5
020df6e
 
 
 
 
 
 
e3395e5
020df6e
 
 
 
 
 
 
 
 
e3395e5
020df6e
1315fe8
020df6e
 
 
 
 
 
 
 
773e139
020df6e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import chainlit as cl
import os 
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import ConversationalRetrievalChain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import PromptTemplate

# Access the OpenAI API key from the environment
open_ai_key = os.getenv("OPENAI_API_KEY")

llm = ChatOpenAI(api_key=open_ai_key)

template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

def process_pdf_and_ask_question(pdf_file, question):
    # Load and process the PDF
    loader = PyPDFLoader(pdf_file.name)
    pdf_data = loader.load()

    # Split the text into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    docs = text_splitter.split_documents(pdf_data)

    # Create a Chroma vector store
    embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base")
    db = Chroma.from_documents(docs, embeddings)

    # Initialize message history for conversation
    message_history = ChatMessageHistory()

    # Memory for conversational context
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    # Create a chain that uses the Chroma vector store
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(),
        memory=memory,
        return_source_documents=False,
        combine_docs_chain_kwargs={'prompt': prompt}
    )

    # Process the question
    res = chain({"question": question})
    return res["answer"]

def gradio_interface(pdf, question):
    return process_pdf_and_ask_question(pdf, question)

# Gradio interface
gr.Interface(
    fn=gradio_interface,
    inputs=[gr.File(file_count="single", type="filepath"), gr.Textbox(lines=2, placeholder="Ask a question...")],
    outputs="text",
    title="PDF Q&A",
    description="Upload a PDF and ask questions about it.",
).launch()