Spaces:

richardprobe
/

pdf_upload

Build error

File size: 2,904 Bytes

cb206a1
30d70f9
625af68
30d70f9
625af68
 
30d70f9
 
cb206a1
625af68
 
 
30d70f9
f5be035
 
 
f59bbd9
 
f5be035
 
 
f59bbd9
625af68
24e3d97
625af68
 
 
30d70f9
 
625af68
30d70f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc0a07e
 
24e3d97
30d70f9
 
 
 
 
 
 
625af68
30d70f9
cb206a1
f59bbd9
 
30d70f9
df0d3a5
30d70f9
 
 
f59bbd9
97442a6
f59bbd9
30d70f9

import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain_openai import OpenAI
from dotenv import load_dotenv
import os
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

# Load environment variables from .env file
load_dotenv()

def extract_text_summary_and_guidance_from_pdf(pdf_file):
    # Load the PDF file using PyPDFLoader
    loader = PyPDFLoader(pdf_file.name)
    documents = loader.load()
    text = ""
    
    # Extract text from each document
    for document in documents:
        text += document.page_content
    
    # Initialize the OpenAI model with the API key from environment variables
    llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
    
    # Load the summarization chain
    summarize_chain = load_summarize_chain(llm)
    qa_chain = load_qa_chain(llm)

    # Get the summary of the text
    summary = summarize_chain.invoke(documents)
    
    # Get the QA chain answer
    question = """
    Context Setup:
    You are given an earnings transcript PDF from a company's quarterly earnings call.
    The document contains detailed discussions about the company's financial performance, future outlook, and guidance from executives.

    Task:
    Your task is to extract and summarize the key guidance provided by the company's executives during the earnings call.
    
    Instructions:
    Focus on extracting specific forward-looking statements and guidance provided by the executives.
    Include information about revenue projections, earnings forecasts, strategic initiatives, and any notable remarks about future performance.
    Ignore general commentary, introductions, and routine financial data unless it directly pertains to future guidance.
    Present the extracted information in bullet points.
    If possible, emphasize on forward-looking/expected financial metrics first, then forward-looking/expected business tailwind/headwinds. 
    If possible, incorporate specific numbers along with each guidance point. 
    Ensure every statement must be about expectation of future, or forward-looking statement. 

    Output Format:
    Provide the extracted key guidance in bullet points.

    """

    answer = qa_chain.run(input_documents=documents, question=question)
    
    return text, summary['output_text'], answer

# Create a Gradio interface
interface = gr.Interface(
    fn=extract_text_summary_and_guidance_from_pdf,
    inputs=gr.File(label="Upload PDF"),
    outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary"), gr.Textbox(label="Guidance")],
    title="PDF Text Extractor, Summarizer, and QA Guidance",
    description="Upload a PDF file to extract and display its text content, summary, and QA guidance."
)

# Launch the interface
interface.launch(share=True)