# %% from haystack.document_stores import FAISSDocumentStore document_store = FAISSDocumentStore(faiss_index_factory_str="Flat") # %% import pandas as pd df_document = pd.read_csv("data/articles.csv") articles = [] for idx, row in df_document.iterrows(): article = { "content": row["article"], "meta":{ "chapter_name": row["chapter_name"], "article_page": row["article_page"], "article_number": row["article_number"], "article_name": row["article_name"], }, } articles.append(article) document_store.write_documents(articles, index="document") print(f"Loaded {document_store.get_document_count()} documents") # %% from haystack.nodes import DensePassageRetriever retriever = DensePassageRetriever( document_store=document_store, query_embedding_model="sadakmed/dpr-passage_encoder-spanish", passage_embedding_model="sadakmed/dpr-passage_encoder-spanish", max_seq_len_query=64, max_seq_len_passage=384, batch_size=16, use_gpu=False, embed_title=True, use_fast_tokenizers=True, ) document_store.update_embeddings(retriever) # %% from haystack.nodes import FARMReader model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" reader = FARMReader( model_name_or_path=model_ckpt, progress_bar=False, max_seq_len=384, doc_stride=128, return_no_answer=True, use_gpu=False, ) # %% from haystack.pipelines import ExtractiveQAPipeline pipe = ExtractiveQAPipeline(reader, retriever) # %% question = "pueblos originarios justicia" prediction = pipe.run( query=question, params={ "Retriever": {"top_k": 10}, "Reader": {"top_k": 5} } ) # %% from pprint import pprint pprint(prediction) # %% from haystack.utils import print_answers print_answers(prediction, details="minimum") # %%