|
|
|
|
|
import os |
|
from time import sleep |
|
|
|
from haystack.document_stores import ElasticsearchDocumentStore |
|
from haystack.utils import launch_es |
|
|
|
launch_es() |
|
sleep(30) |
|
|
|
os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False" |
|
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document") |
|
|
|
import pandas as pd |
|
|
|
df_document = pd.read_csv("data/articles.csv") |
|
df_document.head() |
|
|
|
articles = [] |
|
for idx, row in df_document.iterrows(): |
|
article = { |
|
"id": idx, |
|
"content": row["article"], |
|
"meta":{ |
|
"chapter_name": row["chapter_name"], |
|
"article_page": row["article_page"], |
|
"article_number": row["article_number"], |
|
"article_name": row["article_name"], |
|
}, |
|
} |
|
articles.append(article) |
|
|
|
document_store.write_documents(articles, index="document") |
|
print(f"Loaded {document_store.get_document_count()} documents") |
|
|
|
from haystack.nodes import BM25Retriever |
|
|
|
retriever = BM25Retriever(document_store=document_store) |
|
|
|
from haystack.nodes import FARMReader |
|
|
|
model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" |
|
reader = FARMReader( |
|
model_name_or_path=model_ckpt, |
|
progress_bar=False, |
|
max_seq_len=384, |
|
doc_stride=128, |
|
return_no_answer=False, |
|
use_gpu=False, |
|
) |
|
|
|
from haystack.pipelines import ExtractiveQAPipeline |
|
|
|
pipe = ExtractiveQAPipeline(reader, retriever) |
|
|
|
from textwrap import fill |
|
|
|
|
|
def run_qa_pipeline(question): |
|
results = pipe.run( |
|
query=question, |
|
params={ |
|
"Retriever": {"top_k": 10}, |
|
"Reader": {"top_k": 5} |
|
} |
|
) |
|
return results |
|
|
|
def results_as_markdown(results): |
|
top_answers = [] |
|
for count, result in enumerate(results["answers"]): |
|
article = document_store.get_document_by_id(result.document_id) |
|
meta = result.meta |
|
formatted_answer = """**Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.** |
|
{} |
|
""".format( |
|
meta["chapter_name"], |
|
meta["article_number"], |
|
meta["article_name"], |
|
meta["article_page"], |
|
fill(article.content, 80), |
|
) |
|
top_answers.append(formatted_answer) |
|
|
|
return "\n\n".join(top_answers) |
|
|
|
def query_qa_pipeline(question): |
|
results = run_qa_pipeline(question) |
|
return results_as_markdown(results) |
|
|
|
|
|
import gradio as gr |
|
|
|
title = "**CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN**" |
|
default_question = "educación gratuita" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(title) |
|
with gr.Column(): |
|
with gr.Row(): |
|
question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question) |
|
with gr.Row(): |
|
btn = gr.Button("Buscar") |
|
with gr.Row(): |
|
answers = gr.Markdown() |
|
btn.click( |
|
fn=query_qa_pipeline, |
|
inputs=question, |
|
outputs=answers, |
|
) |
|
|
|
demo.launch(share=True) |
|
|
|
|
|
|