Spaces:
Sleeping
Sleeping
Fix bug
Browse filesFix the bug that occurs when the pdf does'nt have title or author.
app.py
CHANGED
@@ -20,14 +20,20 @@ def summarize_pdf(pdf_file):
|
|
20 |
if pdf_file is not None:
|
21 |
with st.spinner('Generando resumen, espera un poco...'):
|
22 |
reader = PyPDF2.PdfReader(pdf_file)
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
pages =reader.pages
|
26 |
text = [pages[i].extract_text() for i in range(len(pages))]
|
27 |
text = [utils.drop_non_relevant_text(utils.preprocess_text(x)) for x in text]
|
28 |
text = [' '.join(x) for x in text]
|
29 |
text=[x+'\n' if len(x) < 50 else generate_summary(x)+' \n' for x in text]
|
30 |
-
results = [
|
31 |
st.session_state["summary"] = ' '.join(results)
|
32 |
|
33 |
## Graphic interfaz
|
|
|
20 |
if pdf_file is not None:
|
21 |
with st.spinner('Generando resumen, espera un poco...'):
|
22 |
reader = PyPDF2.PdfReader(pdf_file)
|
23 |
+
if reader.metadata.title == None
|
24 |
+
title = ''
|
25 |
+
else:
|
26 |
+
title = reader.metadata.title
|
27 |
+
if reader.metadata.author == None
|
28 |
+
author = ''
|
29 |
+
else:
|
30 |
+
author = reader.metadata.author
|
31 |
pages =reader.pages
|
32 |
text = [pages[i].extract_text() for i in range(len(pages))]
|
33 |
text = [utils.drop_non_relevant_text(utils.preprocess_text(x)) for x in text]
|
34 |
text = [' '.join(x) for x in text]
|
35 |
text=[x+'\n' if len(x) < 50 else generate_summary(x)+' \n' for x in text]
|
36 |
+
results = [title+' \n', author+' \n'] + text
|
37 |
st.session_state["summary"] = ' '.join(results)
|
38 |
|
39 |
## Graphic interfaz
|