Arslan17121 commited on
Commit
c42ba0a
·
verified ·
1 Parent(s): f3a6a80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -33
app.py CHANGED
@@ -1,30 +1,18 @@
1
  import streamlit as st
 
2
  import PyPDF2
3
  from transformers import pipeline
4
  from gtts import gTTS
5
- from PIL import Image
6
 
7
- # Function to read the PDF and extract text
8
  def extract_text_from_pdf(pdf_file):
9
- pdf_reader = PyPDF2.PdfReader(pdf_file)
 
10
  text = ""
11
- for page_num in range(len(pdf_reader.pages)):
12
- text += pdf_reader.pages[page_num].extract_text()
13
  return text
14
 
15
- # Function to answer questions based on document image
16
- def answer_question_with_docvqa(pdf_file, question):
17
- docvqa_pipeline = pipeline(
18
- "document-question-answering",
19
- model="google/pix2struct-docvqa-large"
20
- )
21
- answers = []
22
- for page_num in range(len(pdf_file.pages)):
23
- page = pdf_file.pages[page_num]
24
- page_image = page.to_image() # Convert PDF page to image if possible
25
- answers.append(docvqa_pipeline(image=Image.open(page_image), question=question))
26
- return answers
27
-
28
  # Function to generate discussion points
29
  def generate_discussion_points(text):
30
  summarizer = pipeline('summarization')
@@ -41,28 +29,18 @@ st.title("PDF Analysis and Discussion Generator")
41
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
42
 
43
  if uploaded_file is not None:
44
- # Load PDF for processing
45
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
46
-
47
- # Extract and display text content
48
- text = extract_text_from_pdf(pdf_reader)
49
  st.subheader("Extracted Text")
50
  st.write(text)
51
 
52
- # Question answering functionality
53
- st.subheader("Ask Questions About the Document")
54
- user_question = st.text_input("Enter your question:")
55
- if user_question:
56
- answers = answer_question_with_docvqa(pdf_reader, user_question)
57
- st.write("Answer:", answers)
58
-
59
  # Generate and display discussion points
60
- discussion_points = generate_discussion_points(text)
61
  st.subheader("Generated Discussion Points")
 
62
  st.write(discussion_points)
63
 
64
- # Convert discussion points to audio
65
  text_to_speech(discussion_points)
66
  audio_file = open("discussion_points.mp3", "rb")
67
  audio_bytes = audio_file.read()
68
- st.audio(audio_bytes, format='audio/mp3')
 
1
  import streamlit as st
2
+ import io
3
  import PyPDF2
4
  from transformers import pipeline
5
  from gtts import gTTS
 
6
 
7
+ # Function to extract text from a PDF
8
  def extract_text_from_pdf(pdf_file):
9
+ pdf_stream = io.BytesIO(pdf_file.read())
10
+ pdf_reader = PyPDF2.PdfReader(pdf_stream)
11
  text = ""
12
+ for page in pdf_reader.pages:
13
+ text += page.extract_text() or "" # Handle None for non-text pages
14
  return text
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Function to generate discussion points
17
  def generate_discussion_points(text):
18
  summarizer = pipeline('summarization')
 
29
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
30
 
31
  if uploaded_file is not None:
32
+ # Extract text from the uploaded PDF
33
+ text = extract_text_from_pdf(uploaded_file)
 
 
 
34
  st.subheader("Extracted Text")
35
  st.write(text)
36
 
 
 
 
 
 
 
 
37
  # Generate and display discussion points
 
38
  st.subheader("Generated Discussion Points")
39
+ discussion_points = generate_discussion_points(text)
40
  st.write(discussion_points)
41
 
42
+ # Convert discussion points to audio and play it
43
  text_to_speech(discussion_points)
44
  audio_file = open("discussion_points.mp3", "rb")
45
  audio_bytes = audio_file.read()
46
+ st.audio(audio_bytes, format="audio/mp3")