Spaces:
Build error
Build error
import gradio as gr | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain_openai import OpenAI | |
from dotenv import load_dotenv | |
import os | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains.question_answering import load_qa_chain | |
# Load environment variables from .env file | |
load_dotenv() | |
def extract_text_summary_and_guidance_from_pdf(pdf_file): | |
# Load the PDF file using PyPDFLoader | |
loader = PyPDFLoader(pdf_file.name) | |
documents = loader.load() | |
text = "" | |
# Extract text from each document | |
for document in documents: | |
text += document.page_content | |
# Initialize the OpenAI model with the API key from environment variables | |
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0) | |
# Load the summarization chain | |
summarize_chain = load_summarize_chain(llm) | |
qa_chain = load_qa_chain(llm) | |
# Get the summary of the text | |
summary = summarize_chain.invoke(documents) | |
# Get the QA chain answer | |
question = """ | |
Context Setup: | |
You are given an earnings transcript PDF from a company's quarterly earnings call. | |
The document contains detailed discussions about the company's financial performance, future outlook, and guidance from executives. | |
Task: | |
Your task is to extract and summarize the key guidance provided by the company's executives during the earnings call. | |
Instructions: | |
Focus on extracting specific forward-looking statements and guidance provided by the executives. | |
Include information about revenue projections, earnings forecasts, strategic initiatives, and any notable remarks about future performance. | |
Ignore general commentary, introductions, and routine financial data unless it directly pertains to future guidance. | |
Present the extracted information in bullet points. | |
If possible, emphasize on forward-looking/expected financial metrics first, then forward-looking/expected business tailwind/headwinds. | |
If possible, incorporate specific numbers along with each guidance point. | |
Ensure every statement must be about expectation of future, or forward-looking statement. | |
Output Format: | |
Provide the extracted key guidance in bullet points. | |
""" | |
answer = qa_chain.run(input_documents=documents, question=question) | |
return text, summary['output_text'], answer | |
# Create a Gradio interface | |
interface = gr.Interface( | |
fn=extract_text_summary_and_guidance_from_pdf, | |
inputs=gr.File(label="Upload PDF"), | |
outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary"), gr.Textbox(label="Guidance")], | |
title="PDF Text Extractor, Summarizer, and QA Guidance", | |
description="Upload a PDF file to extract and display its text content, summary, and QA guidance." | |
) | |
# Launch the interface | |
interface.launch(share=True) |