andreeabodea commited on
Commit
5fa7016
·
verified ·
1 Parent(s): 220da13

get first page text

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -86,6 +86,12 @@ def process_pdf(path):
86
  #json_string = json.dumps(results_dict, indent=4)
87
  #print(json_string)
88
 
 
 
 
 
 
 
89
  # Define the Gradio interface
90
  iface = gr.Interface(fn=process_pdf,
91
  inputs=gr.File(type="binary", label="Upload PDF"),
 
86
  #json_string = json.dumps(results_dict, indent=4)
87
  #print(json_string)
88
 
89
+
90
+ def get_first_page_text(path):
91
+ doc = pdfplumber.open(io.BytesIO(path))
92
+ if len(doc.pages):
93
+ return doc.pages[0].extract_text()
94
+
95
  # Define the Gradio interface
96
  iface = gr.Interface(fn=process_pdf,
97
  inputs=gr.File(type="binary", label="Upload PDF"),