import fitz import gradio as gr import re from transformers import pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") qa_model = pipeline("question-answering", model="deepset/bert-large-uncased-whole-word-masking-squad2") def extract_text_from_pdf(pdf_file): with fitz.open(pdf_file) as pdf: text = "" for page in pdf: text += page.get_text("text") text = re.sub(r'\s+', ' ', text).strip() return text def summarize(text): if len(text) > 1000: chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] summary = "" for chunk in chunks: summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " " else: summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] return summary def answer_question(text, question): response = qa_model(question=question, context=text) answer = response['answer'] return answer def summarize_and_qa(pdf_file, question): text = extract_text_from_pdf(pdf_file) summary = summarize(text) answer = answer_question(text, question) return summary, answer gr.Interface( fn=summarize_and_qa, inputs=["file", "text"], outputs=["textbox", "textbox"], title="Understand your PDF Better", description="Upload a PDF to get a summary. You can ask any question regarding the content of the PDF." ).launch(debug=True, share=True)