Richard Hsu commited on
Commit
30d70f9
·
1 Parent(s): 625af68
Files changed (2) hide show
  1. app.py +37 -13
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,14 +1,16 @@
1
  import gradio as gr
2
- from langchain.document_loaders import PyPDFLoader
3
  from langchain.chains.summarize import load_summarize_chain
4
- from langchain.llms import OpenAI
5
  from dotenv import load_dotenv
6
  import os
 
 
7
 
8
  # Load environment variables from .env file
9
  load_dotenv()
10
 
11
- def extract_text_and_summary_from_pdf(pdf_file):
12
  # Load the PDF file using PyPDFLoader
13
  loader = PyPDFLoader(pdf_file.name)
14
  documents = loader.load()
@@ -19,25 +21,47 @@ def extract_text_and_summary_from_pdf(pdf_file):
19
  text += document.page_content
20
 
21
  # Initialize the OpenAI model with the API key from environment variables
22
- openai_api_key = os.getenv("OPENAI_API_KEY")
23
- llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=openai_api_key)
24
 
25
  # Load the summarization chain
26
  summarize_chain = load_summarize_chain(llm)
27
-
 
28
  # Get the summary of the text
29
- summary = summarize_chain.run(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- return text, summary
32
 
33
  # Create a Gradio interface
34
  interface = gr.Interface(
35
- fn=extract_text_and_summary_from_pdf,
36
  inputs=gr.File(label="Upload PDF"),
37
- outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary")],
38
- title="PDF Text Extractor and Summarizer",
39
- description="Upload a PDF file to extract and display its text content and summary."
40
  )
41
 
42
  # Launch the interface
43
- interface.launch(share=True)
 
1
  import gradio as gr
2
+ from langchain_community.document_loaders import PyPDFLoader
3
  from langchain.chains.summarize import load_summarize_chain
4
+ from langchain_openai import OpenAI
5
  from dotenv import load_dotenv
6
  import os
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.chains.question_answering import load_qa_chain
9
 
10
  # Load environment variables from .env file
11
  load_dotenv()
12
 
13
+ def extract_text_summary_and_guidance_from_pdf(pdf_file):
14
  # Load the PDF file using PyPDFLoader
15
  loader = PyPDFLoader(pdf_file.name)
16
  documents = loader.load()
 
21
  text += document.page_content
22
 
23
  # Initialize the OpenAI model with the API key from environment variables
24
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
 
25
 
26
  # Load the summarization chain
27
  summarize_chain = load_summarize_chain(llm)
28
+ qa_chain = load_qa_chain(llm)
29
+
30
  # Get the summary of the text
31
+ summary = summarize_chain.invoke(documents)
32
+
33
+ # Get the QA chain answer
34
+ question = """
35
+ Context Setup:
36
+ You are given an earnings transcript PDF from a company's quarterly earnings call.
37
+ The document contains detailed discussions about the company's financial performance, future outlook, and guidance from executives.
38
+
39
+ Task:
40
+ Your task is to extract and summarize the key guidance provided by the company's executives during the earnings call.
41
+
42
+ Instructions:
43
+ Focus on extracting specific forward-looking statements and guidance provided by the executives.
44
+ Include information about revenue projections, earnings forecasts, strategic initiatives, and any notable remarks about future performance.
45
+ Ignore general commentary, introductions, and routine financial data unless it directly pertains to future guidance.
46
+ Present the extracted information in bullet points.
47
+
48
+ Output Format:
49
+ Provide the extracted key guidance in bullet points.
50
+
51
+ """
52
+
53
+ answer = qa_chain.run(input_documents=documents, question=question)
54
 
55
+ return text, summary['output_text'], answer
56
 
57
  # Create a Gradio interface
58
  interface = gr.Interface(
59
+ fn=extract_text_summary_and_guidance_from_pdf,
60
  inputs=gr.File(label="Upload PDF"),
61
+ outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary"), gr.Textbox(label="Guidance")],
62
+ title="PDF Text Extractor, Summarizer, and QA Guidance",
63
+ description="Upload a PDF file to extract and display its text content, summary, and QA guidance."
64
  )
65
 
66
  # Launch the interface
67
+ interface.launch(share=True)
requirements.txt CHANGED
@@ -69,4 +69,5 @@ yarl==1.9.2
69
  pypdf==3.10.0
70
  pypdf2
71
  python-dotenv
72
- openai
 
 
69
  pypdf==3.10.0
70
  pypdf2
71
  python-dotenv
72
+ openai
73
+ langchain-community