File size: 2,368 Bytes
0d7799a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from flask import Flask, render_template, request, redirect, url_for, flash, jsonify
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import os
app = Flask(__name__)
app.secret_key = "supersecretkey"
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
# Load the pre-trained BART tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
def extract_text_from_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
@app.route('/')
def index():
return render_template('index.html')
@app.route('/upload', methods =['POST'])
def upload_file():
if 'file' not in request.files:
flash("No File Path")
return redirect(url_for('index'))
file = request.files['file']
if file.filename == '':
flash("Not Selected File")
return redirect(url_for('index'))
if file and file.filename.endswith('.pdf'):
file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
flash("File Successfully Uploaded")
file.save(file)
text = extract_text_from_pdf(file)
inputs = tokenizer(text, max_length=1024, return_tensors="pt", truncation=True)
summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=300, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return render_template('index.html', summary=summary)
else:
flash("Only PDF file are alllowed")
return redirect(url_for('index'))
@app.route('/summarize', methods=['POST'])
def summarize_text():
data = request.json
text = data.get('text', '')
if text:
inputs = tokenizer(text, max_length=1024, return_tensors="pt", truncation=True)
summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=300, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return jsonify({'summary': summary})
return jsonify({'summary': ''}), 400
if __name__ == "__main__":
app.run(debug=True) |