File size: 1,504 Bytes
8c7ca03 1f2b1df a6edded 1f2b1df 8c7ca03 1f2b1df 8c7ca03 1f2b1df 8c7ca03 1f2b1df 8c7ca03 152eb59 8c7ca03 1f2b1df 8c7ca03 152eb59 1f2b1df 152eb59 8c7ca03 152eb59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import fitz
import gradio as gr
import re
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_model = pipeline("question-answering", model="deepset/bert-large-uncased-whole-word-masking-squad2")
def extract_text_from_pdf(pdf_file):
with fitz.open(pdf_file) as pdf:
text = ""
for page in pdf:
text += page.get_text("text")
text = re.sub(r'\s+', ' ', text).strip()
return text
def summarize(text):
if len(text) > 1000:
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
summary = ""
for chunk in chunks:
summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " "
else:
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
return summary
def answer_question(text, question):
response = qa_model(question=question, context=text)
answer = response['answer']
return answer
def summarize_and_qa(pdf_file, question):
text = extract_text_from_pdf(pdf_file)
summary = summarize(text)
answer = answer_question(text, question)
return summary, answer
gr.Interface(
fn=summarize_and_qa,
inputs=["file", "text"],
outputs=["textbox", "textbox"],
title="Understand your PDF Better",
description="Upload a PDF to get a summary. You can ask any question regarding the content of the PDF."
).launch(debug=True, share=True)
|