Spaces:

vividsd
/

practice

Build error

App Files Files Community

vividsd commited on Dec 9, 2023

Commit

3075f85

1 Parent(s): 25020a5

Create app.py

Browse files

Files changed (1) hide show

app.py +40 -0

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# imports
+import gradio as gr
+from transformers import pipeline
+import torch
+import PyPDF2
+# function to read the uploaded PDF and extract its PDF when present based on the keyword "abstract" search.
+# If the PDF doesn't have the word "abstract" it won't work
+# also, I'm trying to limitate to the abstract itself, not to other sections, by adding a pattern of in between headers
+def process_pdf(pdf):
+    with open(pdf.name, "rb") as f:
+        reader = PyPDF2.PdfReader(f)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        abstract_start = text.lower().find("abstract:")
+        if abstract_start != -1:
+            abstract_end = text.lower().find("\n\n", abstract_start)
+            if abstract_end != -1:
+                abstract = text[abstract_start:abstract_end]
+            else:
+                abstract = text[abstract_start:]
+        else:
+            abstract = "Abstract not found."
+        return abstract
+#Now creating the interface to read the PDFs
+interface = gr.Interface(fn=process_pdf,
+                           inputs=gr.inputs.File(type="file", label="Upload PDF"),
+                           outputs="text",
+                           title="Summarizing outloud",
+                           description="Extract abstracts from PDFs, summarize then in 1 sentence and get an audio of it",
+                           examples=[["example_pdf1.pdf"], ["example_pdf2.pdf"]])
+   if __name__ == "__main__":
+       interface.launch()