wholewhale's picture
split summary
8d1c8be
raw
history blame
3.27 kB
import os
import gradio as gr
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chat_models import ChatAnthropic
from langchain.prompts import ChatPromptTemplate
from langchain.document_loaders import TextLoader
# Set API keys from environment variables
os.environ['ANTHROPIC_API_KEY'] = os.getenv("ANTHROPIC_API_KEY")
pdf_content = ""
def load_pdf(pdf_doc):
global pdf_content
try:
if pdf_doc is None:
return "No PDF uploaded."
# Load PDF content
loader = OnlinePDFLoader(pdf_doc.name)
documents = loader.load()
# Assuming the `documents` is a list of strings representing each page
pdf_content = ' '.join(documents)
return "PDF Loaded Successfully."
except Exception as e:
return f"Error processing PDF: {e}"
def chat_with_pdf(question):
# Create an instance of the ChatAnthropic model
model = ChatAnthropic()
# Define the chat prompt template
prompt = ChatPromptTemplate.from_messages([
("human", pdf_content),
("human", question),
("human", "Give a clear summary of this pdf information at a 8th grade reading level.")
])
# Invoke the model using the chain
chain = prompt | model
response = chain.invoke({})
# Get the summary of the PDF content
summarizer = pipeline("summarization")
summary = summarizer(pdf_content, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
# Combine the chat response and the summary
combined_response = f"Summary: {summary}\n\nChat Response: {response.content}"
return combined_response
# Define Gradio UI
def gradio_interface(pdf_doc, question):
# ...
return gr.Interface(
fn=chat_with_pdf,
inputs=[pdf_doc, question],
outputs=gr.outputs.Textbox(),
api_name='chat_with_pdf_2'
)
def gradio_interface(pdf_doc, question):
if not pdf_content:
return load_pdf(pdf_doc)
else:
# Get the summary of the PDF content
summarizer = pipeline("summarization")
summary = summarizer(pdf_content, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
# Get the chat response
response = chat_with_pdf(question)
# Define the outputs
summary_output = gr.outputs.Textbox(label="Summary")
chat_output = gr.outputs.Textbox(label="Chat Response")
# Return the Gradio interface with the Multi output
return gr.Interface(
fn=chat_with_pdf,
inputs=[pdf_doc, question],
outputs=gradio.outputs.Multi(summary_output, chat_output),
examples=[["sample.pdf", "What is this document about?"]],
api_name='chat_with_pdf_2'
)
gradio_interface(None, None)
gr.Interface(fn=gradio_interface,
inputs=[gr.File(label="Load a pdf", file_types=['.pdf'], type="file"),
gr.Textbox(label="Ask a question about the PDF")],
outputs="text",
live=True,
title="Chat with PDF content using Anthropic",
description="Upload a .PDF and interactively chat about its content."
).launch()