|
import os |
|
import gradio as gr |
|
from langchain.document_loaders import OnlinePDFLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.chat_models import ChatAnthropic |
|
from langchain.prompts import ChatPromptTemplate |
|
from langchain.document_loaders import TextLoader |
|
|
|
|
|
os.environ['ANTHROPIC_API_KEY'] = os.getenv("ANTHROPIC_API_KEY") |
|
|
|
pdf_content = "" |
|
|
|
def load_pdf(pdf_doc): |
|
global pdf_content |
|
try: |
|
if pdf_doc is None: |
|
return "No PDF uploaded." |
|
|
|
|
|
loader = OnlinePDFLoader(pdf_doc.name) |
|
documents = loader.load() |
|
|
|
|
|
pdf_content = ' '.join(documents) |
|
|
|
return "PDF Loaded Successfully." |
|
|
|
except Exception as e: |
|
return f"Error processing PDF: {e}" |
|
|
|
def chat_with_pdf(question): |
|
|
|
model = ChatAnthropic() |
|
|
|
|
|
prompt = ChatPromptTemplate.from_messages([ |
|
("human", pdf_content), |
|
("human", question), |
|
("human", "Give a clear summary of this pdf information at a 8th grade reading level.") |
|
]) |
|
|
|
|
|
chain = prompt | model |
|
response = chain.invoke({}) |
|
|
|
|
|
summarizer = pipeline("summarization") |
|
summary = summarizer(pdf_content, max_length=1000, min_length=30, do_sample=False)[0]['summary_text'] |
|
|
|
|
|
combined_response = f"Summary: {summary}\n\nChat Response: {response.content}" |
|
|
|
return combined_response |
|
|
|
|
|
def gradio_interface(pdf_doc, question): |
|
|
|
return gr.Interface( |
|
fn=chat_with_pdf, |
|
inputs=[pdf_doc, question], |
|
outputs=gr.outputs.Textbox(), |
|
api_name='chat_with_pdf_2' |
|
) |
|
|
|
def gradio_interface(pdf_doc, question): |
|
if not pdf_content: |
|
return load_pdf(pdf_doc) |
|
else: |
|
|
|
summarizer = pipeline("summarization") |
|
summary = summarizer(pdf_content, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] |
|
|
|
|
|
response = chat_with_pdf(question) |
|
|
|
|
|
summary_output = gr.outputs.Textbox(label="Summary") |
|
chat_output = gr.outputs.Textbox(label="Chat Response") |
|
|
|
|
|
return gr.Interface( |
|
fn=chat_with_pdf, |
|
inputs=[pdf_doc, question], |
|
outputs=gradio.outputs.Multi(summary_output, chat_output), |
|
examples=[["sample.pdf", "What is this document about?"]], |
|
api_name='chat_with_pdf_2' |
|
) |
|
|
|
gradio_interface(None, None) |
|
|
|
|
|
gr.Interface(fn=gradio_interface, |
|
inputs=[gr.File(label="Load a pdf", file_types=['.pdf'], type="file"), |
|
gr.Textbox(label="Ask a question about the PDF")], |
|
outputs="text", |
|
live=True, |
|
title="Chat with PDF content using Anthropic", |
|
description="Upload a .PDF and interactively chat about its content." |
|
).launch() |
|
|