File size: 4,241 Bytes
bf6d94a
 
48c5b47
bf6d94a
 
 
 
 
 
 
 
 
48c5b47
bf6d94a
 
 
 
 
 
169b01a
bf6d94a
 
48c5b47
bf6d94a
 
 
169b01a
bf6d94a
 
 
 
48c5b47
7f3de04
48c5b47
 
 
 
bf6d94a
 
48c5b47
 
bf6d94a
 
 
 
169b01a
bf6d94a
 
48c5b47
bf6d94a
 
 
169b01a
bf6d94a
 
 
 
48c5b47
7f3de04
48c5b47
bf6d94a
48c5b47
 
bf6d94a
 
 
48c5b47
bf6d94a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5a3fc9
 
 
bf6d94a
d5a3fc9
 
 
bf6d94a
 
 
 
 
 
d5a3fc9
 
 
bf6d94a
d5a3fc9
 
 
 
bf6d94a
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import openai
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader

# Function to load and process the PDF document
def load_pdf(file):
    # Load the PDF using LangChain's PyPDFLoader
    loader = PyPDFLoader(file.name)
    documents = loader.load()
    return documents

# Summarization function using GPT-4
def summarize_pdf(file, openai_api_key):
    # Set the OpenAI API key dynamically
    openai.api_key = openai_api_key

    # Load and process the PDF
    documents = load_pdf(file)

    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use LangChain's FAISS Vector Store to store and search the embeddings
    vector_store = FAISS.from_documents(documents, embeddings)

    # Create a RetrievalQA chain for summarization
    llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key)  # Passing API key here
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )

    # Query the model for a summary of the document
    response = qa_chain.run("Summarize the content of the research paper.")
    return response

# Function to handle user queries and provide answers from the document
def query_pdf(file, user_query, openai_api_key):
    # Set the OpenAI API key dynamically
    openai.api_key = openai_api_key

    # Load and process the PDF
    documents = load_pdf(file)

    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use LangChain's FAISS Vector Store to store and search the embeddings
    vector_store = FAISS.from_documents(documents, embeddings)

    # Create a RetrievalQA chain for querying the document
    llm = ChatOpenAI(model="gpt-4o", openai_api_key=openai_api_key)  # Passing API key here
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )

    # Query the model for the user query
    response = qa_chain.run(user_query)
    return response

# Define Gradio interface for the summarization
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("### ChatPDF and Research Paper Summarizer using GPT-4 and LangChain")
        
        # Input field for API Key
        with gr.Row():
            openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")

        with gr.Tab("Summarize PDF"):
            with gr.Row():
                pdf_file = gr.File(label="Upload PDF Document")
                summarize_btn = gr.Button("Summarize")
                summary_output = gr.Textbox(label="Summary", interactive=False)
                clear_btn_summary = gr.Button("Clear Response")

            # Summarize Button Logic
            summarize_btn.click(summarize_pdf, inputs=[pdf_file, openai_api_key_input], outputs=summary_output)

            # Clear Response Button Logic for Summary Tab
            clear_btn_summary.click(lambda: "", inputs=[], outputs=summary_output)
        
        with gr.Tab("Ask Questions"):
            with gr.Row():
                pdf_file_q = gr.File(label="Upload PDF Document")
                user_input = gr.Textbox(label="Enter your question")
                answer_output = gr.Textbox(label="Answer", interactive=False)
                clear_btn_answer = gr.Button("Clear Response")

            # Submit Question Logic
            user_input.submit(query_pdf, inputs=[pdf_file_q, user_input, openai_api_key_input], outputs=answer_output)

            # Clear Response Button Logic for Answer Tab
            clear_btn_answer.click(lambda: "", inputs=[], outputs=answer_output)

            user_input.submit(None, None, answer_output)  # Clear answer when typing new query
    
    return demo

# Run Gradio app
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(debug=True)