Kabil007 commited on
Commit
0d7799a
·
verified ·
1 Parent(s): b25454b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for, flash, jsonify
2
+ from PyPDF2 import PdfReader
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ import os
6
+
7
+ app = Flask(__name__)
8
+ app.secret_key = "supersecretkey"
9
+
10
+ UPLOAD_FOLDER = 'uploads'
11
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
12
+
13
+ if not os.path.exists(UPLOAD_FOLDER):
14
+ os.makedirs(UPLOAD_FOLDER)
15
+
16
+ # Load the pre-trained BART tokenizer and model
17
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
18
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
19
+
20
+ def extract_text_from_pdf(pdf_path):
21
+ reader = PdfReader(pdf_path)
22
+ text = ""
23
+ for page in reader.pages:
24
+ text += page.extract_text()
25
+ return text
26
+
27
+ @app.route('/')
28
+ def index():
29
+ return render_template('index.html')
30
+
31
+ @app.route('/upload', methods =['POST'])
32
+ def upload_file():
33
+ if 'file' not in request.files:
34
+ flash("No File Path")
35
+ return redirect(url_for('index'))
36
+ file = request.files['file']
37
+
38
+
39
+ if file.filename == '':
40
+ flash("Not Selected File")
41
+ return redirect(url_for('index'))
42
+
43
+ if file and file.filename.endswith('.pdf'):
44
+ file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
45
+ flash("File Successfully Uploaded")
46
+ file.save(file)
47
+ text = extract_text_from_pdf(file)
48
+
49
+ inputs = tokenizer(text, max_length=1024, return_tensors="pt", truncation=True)
50
+ summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=300, early_stopping=True)
51
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
52
+
53
+ return render_template('index.html', summary=summary)
54
+
55
+
56
+ else:
57
+ flash("Only PDF file are alllowed")
58
+ return redirect(url_for('index'))
59
+
60
+ @app.route('/summarize', methods=['POST'])
61
+ def summarize_text():
62
+ data = request.json
63
+ text = data.get('text', '')
64
+
65
+
66
+ if text:
67
+ inputs = tokenizer(text, max_length=1024, return_tensors="pt", truncation=True)
68
+ summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=300, early_stopping=True)
69
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
70
+
71
+ return jsonify({'summary': summary})
72
+
73
+ return jsonify({'summary': ''}), 400
74
+
75
+ if __name__ == "__main__":
76
+ app.run(debug=True)