Spaces:

Arslan17121
/

NotebookCwithqa

Sleeping

App Files Files Community

Arslan17121 commited on Jan 4

Commit

4751360

verified ·

1 Parent(s): d7ba597

Create app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import streamlit as st
+import PyPDF2
+from transformers import pipeline
+from gtts import gTTS
+from PIL import Image
+# Function to read the PDF and extract text
+def extract_text_from_pdf(pdf_file):
+    pdf_reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page_num in range(len(pdf_reader.pages)):
+        text += pdf_reader.pages[page_num].extract_text()
+    return text
+# Function to answer questions based on document image
+def answer_question_with_docvqa(pdf_file, question):
+    docvqa_pipeline = pipeline(
+        "document-question-answering",
+        model="google/pix2struct-docvqa-large"
+    )
+    answers = []
+    for page_num in range(len(pdf_file.pages)):
+        page = pdf_file.pages[page_num]
+        page_image = page.to_image()  # Convert PDF page to image if possible
+        answers.append(docvqa_pipeline(image=Image.open(page_image), question=question))
+    return answers
+# Function to generate discussion points
+def generate_discussion_points(text):
+    summarizer = pipeline('summarization')
+    summary = summarizer(text, max_length=600, min_length=300, do_sample=False)
+    return summary[0]['summary_text']
+# Function to convert text to speech
+def text_to_speech(text):
+    tts = gTTS(text=text, lang='en')
+    tts.save("discussion_points.mp3")
+# Streamlit app
+st.title("PDF Analysis and Discussion Generator")
+uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+if uploaded_file is not None:
+    # Load PDF for processing
+    pdf_reader = PyPDF2.PdfReader(uploaded_file)
+    # Extract and display text content
+    text = extract_text_from_pdf(pdf_reader)
+    st.subheader("Extracted Text")
+    st.write(text)
+    # Question answering functionality
+    st.subheader("Ask Questions About the Document")
+    user_question = st.text_input("Enter your question:")
+    if user_question:
+        answers = answer_question_with_docvqa(pdf_reader, user_question)
+        st.write("Answer:", answers)
+    # Generate and display discussion points
+    discussion_points = generate_discussion_points(text)
+    st.subheader("Generated Discussion Points")
+    st.write(discussion_points)
+    # Convert discussion points to audio
+    text_to_speech(discussion_points)
+    audio_file = open("discussion_points.mp3", "rb")
+    audio_bytes = audio_file.read()
+    st.audio(audio_bytes, format='audio/mp3')