import gradio as gr from langchain_community.document_loaders import PyPDFLoader from langchain.chains.summarize import load_summarize_chain from langchain_openai import OpenAI from dotenv import load_dotenv import os from langchain.chat_models import ChatOpenAI from langchain.chains.question_answering import load_qa_chain # Load environment variables from .env file load_dotenv() def extract_text_summary_and_guidance_from_pdf(pdf_file): # Load the PDF file using PyPDFLoader loader = PyPDFLoader(pdf_file.name) documents = loader.load() text = "" # Extract text from each document for document in documents: text += document.page_content # Initialize the OpenAI model with the API key from environment variables llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0) # Load the summarization chain summarize_chain = load_summarize_chain(llm) qa_chain = load_qa_chain(llm) # Get the summary of the text summary = summarize_chain.invoke(documents) # Get the QA chain answer question = """ Context Setup: You are given an earnings transcript PDF from a company's quarterly earnings call. The document contains detailed discussions about the company's financial performance, future outlook, and guidance from executives. Task: Your task is to extract and summarize the key guidance provided by the company's executives during the earnings call. Instructions: Focus on extracting specific forward-looking statements and guidance provided by the executives. Include information about revenue projections, earnings forecasts, strategic initiatives, and any notable remarks about future performance. Ignore general commentary, introductions, and routine financial data unless it directly pertains to future guidance. Present the extracted information in bullet points. If possible, emphasize on forward-looking/expected financial metrics first, then forward-looking/expected business tailwind/headwinds. If possible, incorporate specific numbers along with each guidance point. Ensure every statement must be about expectation of future, or forward-looking statement. Output Format: Provide the extracted key guidance in bullet points. """ answer = qa_chain.run(input_documents=documents, question=question) return text, summary['output_text'], answer # Create a Gradio interface interface = gr.Interface( fn=extract_text_summary_and_guidance_from_pdf, inputs=gr.File(label="Upload PDF"), outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary"), gr.Textbox(label="Guidance")], title="PDF Text Extractor, Summarizer, and QA Guidance", description="Upload a PDF file to extract and display its text content, summary, and QA guidance." ) # Launch the interface interface.launch(share=True)