Spaces:

eskayML
/

Research-Paper-to-Audio

Runtime error

App Files Files Community

eskayML commited on Jan 22, 2024

Commit

a2ec50f

verified ·

1 Parent(s): 1894bb5

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
app.py +93 -0
hella swag paper official.pdf +3 -0
sklistener-icon.jpg +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hella[[:space:]]swag[[:space:]]paper[[:space:]]official.pdf filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+import pyttsx3
+import tempfile
+import PyPDF2
+from huggingface_hub import InferenceClient
+page_bg_img = """
+<style>
+.stApp {
+background: linear-gradient(  #eee 38%, #ccc 68%);
+}
+</style>
+"""
+st.markdown(page_bg_img, unsafe_allow_html=True)
+st.title("Summarize & Listen to your Academic Materials on the Fly.")
+uploaded_pdf = st.file_uploader("Upload a research Paper", type="pdf")
+full_text = None
+MODEL_NAME = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
+client = InferenceClient(MODEL_NAME)
+DETAILED_SUMMARIZATION_PROMPT = """
+<INST>You are a very powerful summarization engine for summarizing academic contents,
+now you are to  summarize the following text you are going to be provided which is from a document, make sure to understand
+all improperly parsed text and actually parse them properly , also make sure that your final summarization is very coherent and understandable by a student and is under 4000 words ,
+ also the length of the summarized text should be less than the original provided text,
+ if you are provided with a text that includes unnecessary items that do not contribute value to the book like preface about the author, do not include them in the summarization
+  Your summary should be concise and should accurately and objectively communicate the key points of the paper.
+   You should not include any personal opinions or interpretations in your summary but rather focus on
+   objectively presenting the information from the paper. Your summary should be written in your own words
+   and should not include any direct quotes from the paper. Please ensure that your summary is clear,
+   concise, and accurately reflects the content of the original paper.
+ do not go out of context of the words provided.
+ Now here is your provided text :
+</INST>
+"""
+with st.spinner("Extracting Text..."):
+    if uploaded_pdf is not None:
+        tfile = tempfile.NamedTemporaryFile(delete=False)
+        tfile.write(uploaded_pdf.read())
+        with open(tfile.name, "rb") as pdf_file:
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            num_pages = len(pdf_reader.pages)
+            # Get text from all pages
+            full_text = ""
+            for page_num in range(num_pages):
+                page = pdf_reader.pages[page_num]
+                page_text = page.extract_text()
+                full_text += page_text
+            # truncating the full text at 25k characters
+            full_text = full_text if len(full_text) < 100000 else full_text[:100000]
+            # print(full_text)
+            st.success("Text Extracted Successfully!!!")
+###################################################################################
+def synthesize_text_to_audio(text):
+    engine = pyttsx3.init()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
+        temp_file_path = temp_file.name
+        engine.save_to_file(text, temp_file_path)  # Save the audio to a temporary file
+    engine.runAndWait()
+    sound_file = open(temp_file_path, "rb")  # Open the saved audio file for reading
+    return sound_file
+summarized_text = None
+if full_text:
+    with st.spinner("Summarizing Text Content..."):
+        summarized_text = client.text_generation(
+            DETAILED_SUMMARIZATION_PROMPT + full_text,
+            max_new_tokens=4096,
+            temperature=0.2,
+            top_p=0.8,
+        )
+        print(summarized_text)
+if summarized_text:
+    with st.spinner('Synthesizing to Audio...'):
+        st.audio(synthesize_text_to_audio(summarized_text))

hella swag paper official.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a042fdcc4a22848cffde37e9c5257e423443980172bab7367dffb4e779aac37
+size 1192712

sklistener-icon.jpg ADDED Viewed