Spaces:
Sleeping
Sleeping
import gradio as gr | |
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.pipelines import ExtractiveQAPipeline | |
document_store = InMemoryDocumentStore() | |
model = "Saturdays/mdeberta-v3-base-squad2_refugees_dataset_finetuned" | |
reader = FARMReader(model_name_or_path=model) | |
preprocessor = PreProcessor( | |
clean_empty_lines=True, | |
clean_whitespace=True, | |
clean_header_footer=True, | |
split_by="word", | |
split_length=100, | |
split_respect_sentence_boundary=True, | |
split_overlap=3 | |
) | |
def print_answers(results): | |
fields = ["answer", "score"] # "context", | |
answers = results["answers"] | |
filtered_answers = [] | |
for ans in answers: | |
filtered_ans = { | |
field: getattr(ans, field) | |
for field in fields | |
if getattr(ans, field) is not None | |
} | |
filtered_answers.append(filtered_ans) | |
return filtered_answers | |
def pdf_to_document_store(pdf_file): | |
document_store.delete_documents() | |
converter = PDFToTextConverter( | |
remove_numeric_tables=True, valid_languages=["es"]) | |
documents = [converter.convert(file_path=pdf_file, meta=None)[0]] | |
preprocessed_docs = preprocessor.process(documents) | |
document_store.write_documents(preprocessed_docs) | |
return None | |
def predict(question): | |
pdf_to_document_store("data.pdf") | |
retriever = TfidfRetriever(document_store=document_store) | |
pipe = ExtractiveQAPipeline(reader, retriever) | |
result = pipe.run(query=question, params={"Retriever": { | |
"top_k": 5}, "Reader": {"top_k": 3}}) | |
answers = print_answers(result) | |
return answers | |
title = "Chatbot Refugiados" | |
description= "Our chatbot helps refugees arriving in Spain by providing information on key topics." | |
iface = gr.Interface(fn=predict, | |
inputs=[gr.inputs.Textbox(lines=3, label='Haz una pregunta')], | |
outputs="text", | |
title=title, | |
description = description, | |
theme="huggingface", | |
examples=['Dónde pedir ayuda?', 'qué hacer al llegar a España?'] | |
) | |
iface.launch() |