Arslan17121 commited on
Commit
4751360
·
verified ·
1 Parent(s): d7ba597

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ from transformers import pipeline
4
+ from gtts import gTTS
5
+ from PIL import Image
6
+
7
+ # Function to read the PDF and extract text
8
+ def extract_text_from_pdf(pdf_file):
9
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
10
+ text = ""
11
+ for page_num in range(len(pdf_reader.pages)):
12
+ text += pdf_reader.pages[page_num].extract_text()
13
+ return text
14
+
15
+ # Function to answer questions based on document image
16
+ def answer_question_with_docvqa(pdf_file, question):
17
+ docvqa_pipeline = pipeline(
18
+ "document-question-answering",
19
+ model="google/pix2struct-docvqa-large"
20
+ )
21
+ answers = []
22
+ for page_num in range(len(pdf_file.pages)):
23
+ page = pdf_file.pages[page_num]
24
+ page_image = page.to_image() # Convert PDF page to image if possible
25
+ answers.append(docvqa_pipeline(image=Image.open(page_image), question=question))
26
+ return answers
27
+
28
+ # Function to generate discussion points
29
+ def generate_discussion_points(text):
30
+ summarizer = pipeline('summarization')
31
+ summary = summarizer(text, max_length=600, min_length=300, do_sample=False)
32
+ return summary[0]['summary_text']
33
+
34
+ # Function to convert text to speech
35
+ def text_to_speech(text):
36
+ tts = gTTS(text=text, lang='en')
37
+ tts.save("discussion_points.mp3")
38
+
39
+ # Streamlit app
40
+ st.title("PDF Analysis and Discussion Generator")
41
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
42
+
43
+ if uploaded_file is not None:
44
+ # Load PDF for processing
45
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
46
+
47
+ # Extract and display text content
48
+ text = extract_text_from_pdf(pdf_reader)
49
+ st.subheader("Extracted Text")
50
+ st.write(text)
51
+
52
+ # Question answering functionality
53
+ st.subheader("Ask Questions About the Document")
54
+ user_question = st.text_input("Enter your question:")
55
+ if user_question:
56
+ answers = answer_question_with_docvqa(pdf_reader, user_question)
57
+ st.write("Answer:", answers)
58
+
59
+ # Generate and display discussion points
60
+ discussion_points = generate_discussion_points(text)
61
+ st.subheader("Generated Discussion Points")
62
+ st.write(discussion_points)
63
+
64
+ # Convert discussion points to audio
65
+ text_to_speech(discussion_points)
66
+ audio_file = open("discussion_points.mp3", "rb")
67
+ audio_bytes = audio_file.read()
68
+ st.audio(audio_bytes, format='audio/mp3')