Spaces:

shukdevdatta123
/

ChatPDF-Gradio

Sleeping

File size: 3,655 Bytes

bf6d94a
 
48c5b47
bf6d94a
 
 
 
 
 
 
 
 
48c5b47
bf6d94a
 
 
 
 
 
 
 
 
48c5b47
bf6d94a
 
 
 
 
 
 
 
48c5b47
bf6d94a
48c5b47
 
 
 
bf6d94a
 
48c5b47
 
bf6d94a
 
 
 
 
 
 
48c5b47
bf6d94a
 
 
 
 
 
 
 
48c5b47
 
 
bf6d94a
48c5b47
 
bf6d94a
 
 
48c5b47
bf6d94a

import openai
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader

# Function to load and process the PDF document
def load_pdf(file):
    # Load the PDF using LangChain's PyPDFLoader
    loader = PyPDFLoader(file.name)
    documents = loader.load()
    return documents

# Summarization function using GPT-4
def summarize_pdf(file, openai_api_key):
    # Set the API key dynamically
    openai.api_key = openai_api_key

    # Load and process the PDF
    documents = load_pdf(file)

    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings()

    # Use LangChain's FAISS Vector Store to store and search the embeddings
    vector_store = FAISS.from_documents(documents, embeddings)

    # Create a RetrievalQA chain for summarization
    llm = ChatOpenAI(model="gpt-4")  # Using GPT-4 as the LLM
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )

    # Query the model for a summary of the document
    response = qa_chain.run("Summarize the content of the research paper.")
    return response

# Function to handle user queries and provide answers from the document
def query_pdf(file, user_query, openai_api_key):
    # Set the API key dynamically
    openai.api_key = openai_api_key

    # Load and process the PDF
    documents = load_pdf(file)

    # Create embeddings for the documents
    embeddings = OpenAIEmbeddings()

    # Use LangChain's FAISS Vector Store to store and search the embeddings
    vector_store = FAISS.from_documents(documents, embeddings)

    # Create a RetrievalQA chain for querying the document
    llm = ChatOpenAI(model="gpt-4")  # Using GPT-4 as the LLM
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever()
    )

    # Query the model for the user query
    response = qa_chain.run(user_query)
    return response

# Define Gradio interface for the summarization
def create_gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("### ChatPDF and Research Paper Summarizer using GPT-4 and LangChain")
        
        # Input field for API Key
        with gr.Row():
            openai_api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="Enter your OpenAI API key here")

        with gr.Tab("Summarize PDF"):
            with gr.Row():
                pdf_file = gr.File(label="Upload PDF Document")
                summarize_btn = gr.Button("Summarize")
                summary_output = gr.Textbox(label="Summary", interactive=False)
                
            summarize_btn.click(summarize_pdf, inputs=[pdf_file, openai_api_key_input], outputs=summary_output)
        
        with gr.Tab("Ask Questions"):
            with gr.Row():
                pdf_file_q = gr.File(label="Upload PDF Document")
                user_input = gr.Textbox(label="Enter your question")
                answer_output = gr.Textbox(label="Answer", interactive=False)
            
            user_input.submit(query_pdf, inputs=[pdf_file_q, user_input, openai_api_key_input], outputs=answer_output)
            user_input.submit(None, None, answer_output)  # Clear answer when typing new query
    
    return demo

# Run Gradio app
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(debug=True)