Spaces:
Runtime error
Runtime error
import gradio as gr | |
import requests | |
import io | |
import json | |
from transformers import AutoTokenizer, AutoModelForQuestionAnswering | |
# Download and load pre-trained model and tokenizer | |
model_name = "distilbert-base-cased-distilled-squad" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForQuestionAnswering.from_pretrained(model_name) | |
def answer_question(pdf_file, question): | |
# Convert PDF to text | |
pdf_data = pdf_file.read() | |
pdf_stream = io.BytesIO(pdf_data) | |
response = requests.post( | |
'https://pdftotext.com/ExtractText', | |
files={'pdffile': pdf_stream}, | |
data={'form': 'pdftotext'} | |
) | |
text = response.text.strip() | |
# Tokenize question and text | |
input_ids = tokenizer.encode(question, text) | |
# Perform question answering | |
outputs = model(torch.tensor([input_ids]), return_dict=True) | |
answer_start = outputs.start_logits.argmax().item() | |
answer_end = outputs.end_logits.argmax().item() | |
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1])) | |
return answer | |
inputs = [ | |
gr.inputs.File(label="PDF document"), | |
gr.inputs.Textbox(label="Question") | |
] | |
outputs = gr.outputs.Textbox(label="Answer") | |
gr.Interface(fn=answer_question, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool", | |
description="Upload a PDF document and ask a question. The app will use a pre-trained model to find the answer.").launch() | |