Can Günen commited on
Commit
f1cc4d1
·
1 Parent(s): a84266b

added app fiel

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon May 8 00:32:30 2023
4
+
5
+ @author: ahmet
6
+ """
7
+ import pdfplumber
8
+ import gradio as gr
9
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
+
11
+
12
+ def respond(pdf_file):
13
+ pdf_file_name = pdf_file.name
14
+ all_text = ''
15
+ with pdfplumber.open(pdf_file_name) as pdf:
16
+ total_pages = len(pdf.pages)
17
+ for idx, pdf_page in enumerate(pdf.pages):
18
+ single_page_text = pdf_page.extract_text()
19
+ all_text = all_text + '\n' + single_page_text
20
+ print(idx/total_pages)
21
+ if idx/total_pages >0.2:
22
+ break
23
+ tokenizer=AutoTokenizer.from_pretrained('Einmalumdiewelt/T5-Base_GNAD')
24
+ model=AutoModelForSeq2SeqLM.from_pretrained('Einmalumdiewelt/T5-Base_GNAD', return_dict=True)
25
+ inputs=tokenizer.encode("sumarize: " +all_text, return_tensors='pt', max_length=512, truncation=True)
26
+ output = model.generate(inputs, min_length=70, max_length=80)
27
+ summary=tokenizer.decode(output[0])
28
+ return summary
29
+
30
+
31
+ with gr.Blocks() as demo:
32
+ title = """<p><h1 align="center" style="font-size: 36px;">Talk with your document</h1></p>"""
33
+ gr.HTML(title)
34
+ with gr.Row():
35
+ with gr.Column():
36
+ file_input = gr.File(label="PDF File", type="file")
37
+ summarize = gr.Button("Summarize")
38
+ text_output = gr.Textbox(label="Summarized text")
39
+
40
+ summarize.click(fn=respond, inputs=file_input, outputs=text_output)
41
+
42
+
43
+ demo.launch(debug=True)