Spaces:
Runtime error
Runtime error
File size: 1,649 Bytes
b8cce11 ce57726 b8cce11 779fa7a b8cce11 ce57726 b8cce11 ce57726 b8cce11 ce57726 b8cce11 ce57726 b8cce11 ce57726 b8cce11 3c17b68 ce57726 b8cce11 ce57726 b8cce11 ce57726 b8cce11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
import re
import os
import fitz
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
#dsad
tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
def extract_text_from_pdf(pdf_file_path):
doc = fitz.open(pdf_file_path)
text = ""
for page in doc:
text+=page.get_text()
return text
def generate_question_answer_pairs(pdf_file):
if pdf_file is None:
return "Please upload a PDF file"
pdf_text = extract_text_from_pdf(pdf_file.name)
sentences = re.split(r'(?<=[.!?])', pdf_text)
question_answer_pairs = []
for sentence in sentences:
input_ids = tokenizer.encode(sentence, return_tensors="pt")
outputs = model.generate(input_ids, max_length=100, num_return_sequences=1)
question_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
question_answer_pairs.append(question_answer)
result = ''
for question_answer in question_answer_pairs:
qa_parts = question_answer.split("?")
if len(qa_parts) >= 2:
question_part = qa_parts[0] + "?"
answer_part = qa_parts[1].strip()
result += f"Question: {question_part}\nAnswer: {answer_part}\n\n"
return result
title = "Question-Answer Pairs Generation"
input_file = gr.File(label="Upload a PDF file")
output_text = gr.Textbox()
interface = gr.Interface(
fn=generate_question_answer_pairs,
inputs=input_file,
outputs=output_text,
title=title,
)
interface.launch() |