Spaces:
Build error
Build error
File size: 2,904 Bytes
cb206a1 30d70f9 625af68 30d70f9 625af68 30d70f9 cb206a1 625af68 30d70f9 f5be035 f59bbd9 f5be035 f59bbd9 625af68 24e3d97 625af68 30d70f9 625af68 30d70f9 bc0a07e 24e3d97 30d70f9 625af68 30d70f9 cb206a1 f59bbd9 30d70f9 df0d3a5 30d70f9 f59bbd9 97442a6 f59bbd9 30d70f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain_openai import OpenAI
from dotenv import load_dotenv
import os
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
# Load environment variables from .env file
load_dotenv()
def extract_text_summary_and_guidance_from_pdf(pdf_file):
# Load the PDF file using PyPDFLoader
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
text = ""
# Extract text from each document
for document in documents:
text += document.page_content
# Initialize the OpenAI model with the API key from environment variables
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
# Load the summarization chain
summarize_chain = load_summarize_chain(llm)
qa_chain = load_qa_chain(llm)
# Get the summary of the text
summary = summarize_chain.invoke(documents)
# Get the QA chain answer
question = """
Context Setup:
You are given an earnings transcript PDF from a company's quarterly earnings call.
The document contains detailed discussions about the company's financial performance, future outlook, and guidance from executives.
Task:
Your task is to extract and summarize the key guidance provided by the company's executives during the earnings call.
Instructions:
Focus on extracting specific forward-looking statements and guidance provided by the executives.
Include information about revenue projections, earnings forecasts, strategic initiatives, and any notable remarks about future performance.
Ignore general commentary, introductions, and routine financial data unless it directly pertains to future guidance.
Present the extracted information in bullet points.
If possible, emphasize on forward-looking/expected financial metrics first, then forward-looking/expected business tailwind/headwinds.
If possible, incorporate specific numbers along with each guidance point.
Ensure every statement must be about expectation of future, or forward-looking statement.
Output Format:
Provide the extracted key guidance in bullet points.
"""
answer = qa_chain.run(input_documents=documents, question=question)
return text, summary['output_text'], answer
# Create a Gradio interface
interface = gr.Interface(
fn=extract_text_summary_and_guidance_from_pdf,
inputs=gr.File(label="Upload PDF"),
outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary"), gr.Textbox(label="Guidance")],
title="PDF Text Extractor, Summarizer, and QA Guidance",
description="Upload a PDF file to extract and display its text content, summary, and QA guidance."
)
# Launch the interface
interface.launch(share=True) |