Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- .gitattributes +1 -0
- app.py +93 -0
- hella swag paper official.pdf +3 -0
- sklistener-icon.jpg +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
hella[[:space:]]swag[[:space:]]paper[[:space:]]official.pdf filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pyttsx3
|
3 |
+
import tempfile
|
4 |
+
import PyPDF2
|
5 |
+
from huggingface_hub import InferenceClient
|
6 |
+
|
7 |
+
page_bg_img = """
|
8 |
+
<style>
|
9 |
+
.stApp {
|
10 |
+
background: linear-gradient( #eee 38%, #ccc 68%);
|
11 |
+
}
|
12 |
+
</style>
|
13 |
+
"""
|
14 |
+
|
15 |
+
st.markdown(page_bg_img, unsafe_allow_html=True)
|
16 |
+
|
17 |
+
st.title("Summarize & Listen to your Academic Materials on the Fly.")
|
18 |
+
|
19 |
+
uploaded_pdf = st.file_uploader("Upload a research Paper", type="pdf")
|
20 |
+
full_text = None
|
21 |
+
MODEL_NAME = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
|
22 |
+
client = InferenceClient(MODEL_NAME)
|
23 |
+
|
24 |
+
|
25 |
+
DETAILED_SUMMARIZATION_PROMPT = """
|
26 |
+
<INST>You are a very powerful summarization engine for summarizing academic contents,
|
27 |
+
now you are to summarize the following text you are going to be provided which is from a document, make sure to understand
|
28 |
+
all improperly parsed text and actually parse them properly , also make sure that your final summarization is very coherent and understandable by a student and is under 4000 words ,
|
29 |
+
also the length of the summarized text should be less than the original provided text,
|
30 |
+
if you are provided with a text that includes unnecessary items that do not contribute value to the book like preface about the author, do not include them in the summarization
|
31 |
+
|
32 |
+
Your summary should be concise and should accurately and objectively communicate the key points of the paper.
|
33 |
+
You should not include any personal opinions or interpretations in your summary but rather focus on
|
34 |
+
objectively presenting the information from the paper. Your summary should be written in your own words
|
35 |
+
and should not include any direct quotes from the paper. Please ensure that your summary is clear,
|
36 |
+
concise, and accurately reflects the content of the original paper.
|
37 |
+
do not go out of context of the words provided.
|
38 |
+
Now here is your provided text :
|
39 |
+
</INST>
|
40 |
+
"""
|
41 |
+
|
42 |
+
|
43 |
+
with st.spinner("Extracting Text..."):
|
44 |
+
if uploaded_pdf is not None:
|
45 |
+
tfile = tempfile.NamedTemporaryFile(delete=False)
|
46 |
+
tfile.write(uploaded_pdf.read())
|
47 |
+
with open(tfile.name, "rb") as pdf_file:
|
48 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
49 |
+
num_pages = len(pdf_reader.pages)
|
50 |
+
|
51 |
+
# Get text from all pages
|
52 |
+
full_text = ""
|
53 |
+
for page_num in range(num_pages):
|
54 |
+
page = pdf_reader.pages[page_num]
|
55 |
+
page_text = page.extract_text()
|
56 |
+
full_text += page_text
|
57 |
+
|
58 |
+
# truncating the full text at 25k characters
|
59 |
+
full_text = full_text if len(full_text) < 100000 else full_text[:100000]
|
60 |
+
# print(full_text)
|
61 |
+
st.success("Text Extracted Successfully!!!")
|
62 |
+
|
63 |
+
|
64 |
+
###################################################################################
|
65 |
+
|
66 |
+
|
67 |
+
def synthesize_text_to_audio(text):
|
68 |
+
engine = pyttsx3.init()
|
69 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
70 |
+
temp_file_path = temp_file.name
|
71 |
+
engine.save_to_file(text, temp_file_path) # Save the audio to a temporary file
|
72 |
+
|
73 |
+
engine.runAndWait()
|
74 |
+
sound_file = open(temp_file_path, "rb") # Open the saved audio file for reading
|
75 |
+
return sound_file
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
summarized_text = None
|
80 |
+
if full_text:
|
81 |
+
with st.spinner("Summarizing Text Content..."):
|
82 |
+
summarized_text = client.text_generation(
|
83 |
+
DETAILED_SUMMARIZATION_PROMPT + full_text,
|
84 |
+
max_new_tokens=4096,
|
85 |
+
temperature=0.2,
|
86 |
+
top_p=0.8,
|
87 |
+
)
|
88 |
+
print(summarized_text)
|
89 |
+
|
90 |
+
if summarized_text:
|
91 |
+
with st.spinner('Synthesizing to Audio...'):
|
92 |
+
st.audio(synthesize_text_to_audio(summarized_text))
|
93 |
+
|
hella swag paper official.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a042fdcc4a22848cffde37e9c5257e423443980172bab7367dffb4e779aac37
|
3 |
+
size 1192712
|
sklistener-icon.jpg
ADDED