Richard Hsu commited on
Commit
625af68
·
1 Parent(s): f5be035
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +23 -6
  3. requirements.txt +3 -1
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=sk-proj-opQQ4OzmiZ0C6AB71uOfT3BlbkFJBgC8hdxz2ZaEP3gXryMC
app.py CHANGED
@@ -1,7 +1,14 @@
1
  import gradio as gr
2
  from langchain.document_loaders import PyPDFLoader
 
 
 
 
3
 
4
- def extract_text_from_pdf(pdf_file):
 
 
 
5
  # Load the PDF file using PyPDFLoader
6
  loader = PyPDFLoader(pdf_file.name)
7
  documents = loader.load()
@@ -11,15 +18,25 @@ def extract_text_from_pdf(pdf_file):
11
  for document in documents:
12
  text += document.page_content
13
 
14
- return text
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Create a Gradio interface
17
  interface = gr.Interface(
18
- fn=extract_text_from_pdf,
19
  inputs=gr.File(label="Upload PDF"),
20
- outputs=gr.Textbox(label="Extracted Text"),
21
- title="PDF Text Extractor",
22
- description="Upload a PDF file to extract and display its text content."
23
  )
24
 
25
  # Launch the interface
 
1
  import gradio as gr
2
  from langchain.document_loaders import PyPDFLoader
3
+ from langchain.chains.summarize import load_summarize_chain
4
+ from langchain.llms import OpenAI
5
+ from dotenv import load_dotenv
6
+ import os
7
 
8
+ # Load environment variables from .env file
9
+ load_dotenv()
10
+
11
+ def extract_text_and_summary_from_pdf(pdf_file):
12
  # Load the PDF file using PyPDFLoader
13
  loader = PyPDFLoader(pdf_file.name)
14
  documents = loader.load()
 
18
  for document in documents:
19
  text += document.page_content
20
 
21
+ # Initialize the OpenAI model with the API key from environment variables
22
+ openai_api_key = os.getenv("OPENAI_API_KEY")
23
+ llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=openai_api_key)
24
+
25
+ # Load the summarization chain
26
+ summarize_chain = load_summarize_chain(llm)
27
+
28
+ # Get the summary of the text
29
+ summary = summarize_chain.run(text)
30
+
31
+ return text, summary
32
 
33
  # Create a Gradio interface
34
  interface = gr.Interface(
35
+ fn=extract_text_and_summary_from_pdf,
36
  inputs=gr.File(label="Upload PDF"),
37
+ outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary")],
38
+ title="PDF Text Extractor and Summarizer",
39
+ description="Upload a PDF file to extract and display its text content and summary."
40
  )
41
 
42
  # Launch the interface
requirements.txt CHANGED
@@ -67,4 +67,6 @@ uvicorn==0.22.0
67
  websockets==11.0.3
68
  yarl==1.9.2
69
  pypdf==3.10.0
70
- pypdf2
 
 
 
67
  websockets==11.0.3
68
  yarl==1.9.2
69
  pypdf==3.10.0
70
+ pypdf2
71
+ python-dotenv
72
+ openai