Spaces:

mlnotes
/

borrador_constitucion_chile

Runtime error

borrador_constitucion_chile / qa_pipeline_faiss.py

palegre

Add application file beta.

b19c8bc about 3 years ago

1.88 kB

	# %%
	from haystack.document_stores import FAISSDocumentStore


	document_store = FAISSDocumentStore(faiss_index_factory_str="Flat")
	# %%
	import pandas as pd

	df_document = pd.read_csv("data/articles.csv")

	articles = []
	for idx, row in df_document.iterrows():
	article = {
	"content": row["article"],
	"meta":{
	"chapter_name": row["chapter_name"],
	"article_page": row["article_page"],
	"article_number": row["article_number"],
	"article_name": row["article_name"],
	},
	}
	articles.append(article)

	document_store.write_documents(articles, index="document")
	print(f"Loaded {document_store.get_document_count()} documents")
	# %%
	from haystack.nodes import DensePassageRetriever

	retriever = DensePassageRetriever(
	document_store=document_store,
	query_embedding_model="sadakmed/dpr-passage_encoder-spanish",
	passage_embedding_model="sadakmed/dpr-passage_encoder-spanish",
	max_seq_len_query=64,
	max_seq_len_passage=384,
	batch_size=16,
	use_gpu=False,
	embed_title=True,
	use_fast_tokenizers=True,
	)
	document_store.update_embeddings(retriever)
	# %%
	from haystack.nodes import FARMReader

	model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
	reader = FARMReader(
	model_name_or_path=model_ckpt,
	progress_bar=False,
	max_seq_len=384,
	doc_stride=128,
	return_no_answer=True,
	use_gpu=False,
	)
	# %%
	from haystack.pipelines import ExtractiveQAPipeline

	pipe = ExtractiveQAPipeline(reader, retriever)
	# %%
	question = "pueblos originarios justicia"
	prediction = pipe.run(
	query=question,
	params={
	"Retriever": {"top_k": 10},
	"Reader": {"top_k": 5}
	}
	)
	# %%
	from pprint import pprint

	pprint(prediction)

	# %%
	from haystack.utils import print_answers


	print_answers(prediction, details="minimum")
	# %%