File size: 3,541 Bytes
3a6ebd0
 
 
 
8cc74da
7f509d1
3a6ebd0
 
a26aa60
 
3a6ebd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebfc928
 
 
 
 
 
3a6ebd0
2331097
3a6ebd0
ebfc928
 
 
 
 
 
 
 
 
 
 
 
 
 
2331097
 
 
 
 
ebfc928
 
2331097
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipelines import ExtractiveQAPipeline
#from torch import 
from sentence_transformers import CrossEncoder, SentenceTransformer

document_store = InMemoryDocumentStore()
#model = "Saturdays/mdeberta-v3-base-squad2_refugees_dataset_finetuned"
model= "openai/clip-vit-base-patch32"
reader = FARMReader(model_name_or_path=model)
preprocessor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=True,
    split_by="word",
    split_length=100,
    split_respect_sentence_boundary=True,
    split_overlap=3
)


def print_answers(results):
    fields = ["answer", "score"]  # "context",
    answers = results["answers"]
    filtered_answers = []

    for ans in answers:
        filtered_ans = {
            field: getattr(ans, field)
            for field in fields
            if getattr(ans, field) is not None
        }
        filtered_answers.append(filtered_ans)

    return filtered_answers


def pdf_to_document_store(pdf_file):
    document_store.delete_documents()
    converter = PDFToTextConverter(
        remove_numeric_tables=True, valid_languages=["es"])
    documents = [converter.convert(file_path=pdf_file, meta=None)[0]]
    preprocessed_docs = preprocessor.process(documents)
    document_store.write_documents(preprocessed_docs)
    return None


def predict(question):
    pdf_to_document_store("data.pdf")
    retriever = TfidfRetriever(document_store=document_store)
    pipe = ExtractiveQAPipeline(reader, retriever)
    result = pipe.run(query=question, params={"Retriever": {
                      "top_k": 5}, "Reader": {"top_k": 3}})
    answers = print_answers(result)
    return answers

def respond(message, chat_history):
    if len(message)==0:
            message="¿Dónde puedo solicitar asilo?"
    bot_message = predict(message)[0]['answer']
    chat_history.append((message, bot_message))
    return "", chat_history

description= "Our chatbot helps refugees arriving in Spain by providing information on key topics. \n This project is based on the article titled [Desarrollando un chatbot para refugiados: nuestra experiencia en Saturdays.AI](https://medium.com/saturdays-ai/desarrollando-un-chatbot-para-refugiados-nuestra-experiencia-en-saturdays-ai-9bf2551432c9), which outlines the process of building a chatbot for refugees. \n You can find the training script in this [github repo](https://github.com/jsr90/chatbot_refugiados_train)."

with gr.Blocks(theme="huggingface") as demo:
    gr.HTML("<h1 style='text-align: center; font-size: xx-large'>Chatbot Refugiados (spanish)</h1>")
    gr.HTML("<h2 style='text-align: center; font-size: large'>The demo you're about to see is from a project currently in development.</h2>")
    
    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot()
        with gr.Column(scale=1):
            with gr.Row():
                msg = gr.Textbox(label="Write your question:", value="¿Dónde puedo solicitar asilo?")
            with gr.Row():
                submit = gr.Button("Submit")
                clear = gr.Button("Clear")
            gr.Image("OIG.jpeg")

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    submit.click(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

    gr.Markdown(description)

if __name__ == "__main__":
    demo.launch()