Spaces:

mlnotes
/

borrador_constitucion_chile

App Files Files Community

borrador_constitucion_chile / app.py

palegre

Add application file beta.

b19c8bc over 2 years ago

3.02 kB


	# %%
	import os
	from time import sleep

	from haystack.document_stores import ElasticsearchDocumentStore
	from haystack.utils import launch_es

	launch_es()
	sleep(30)
	# %%
	os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False"
	document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
	# %%
	import pandas as pd

	df_document = pd.read_csv("data/articles.csv")
	df_document.head()
	# %%
	articles = []
	for idx, row in df_document.iterrows():
	article = {
	"id": idx,
	"content": row["article"],
	"meta":{
	"chapter_name": row["chapter_name"],
	"article_page": row["article_page"],
	"article_number": row["article_number"],
	"article_name": row["article_name"],
	},
	}
	articles.append(article)

	document_store.write_documents(articles, index="document")
	print(f"Loaded {document_store.get_document_count()} documents")
	# %%
	from haystack.nodes import BM25Retriever

	retriever = BM25Retriever(document_store=document_store)
	# %%
	from haystack.nodes import FARMReader

	model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
	reader = FARMReader(
	model_name_or_path=model_ckpt,
	progress_bar=False,
	max_seq_len=384,
	doc_stride=128,
	return_no_answer=False,
	use_gpu=False,
	)
	# %%
	from haystack.pipelines import ExtractiveQAPipeline

	pipe = ExtractiveQAPipeline(reader, retriever)
	# %%
	from textwrap import fill


	def run_qa_pipeline(question):
	results = pipe.run(
	query=question,
	params={
	"Retriever": {"top_k": 10},
	"Reader": {"top_k": 5}
	}
	)
	return results

	def results_as_markdown(results):
	top_answers = []
	for count, result in enumerate(results["answers"]):
	article = document_store.get_document_by_id(result.document_id)
	meta = result.meta
	formatted_answer = """Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.
	{}
	""".format(
	meta["chapter_name"],
	meta["article_number"],
	meta["article_name"],
	meta["article_page"],
	fill(article.content, 80),
	)
	top_answers.append(formatted_answer)

	return "\n\n".join(top_answers)

	def query_qa_pipeline(question):
	results = run_qa_pipeline(question)
	return results_as_markdown(results)

	# %%
	import gradio as gr

	title = "CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN"
	default_question = "educación gratuita"

	with gr.Blocks() as demo:
	gr.Markdown(title)
	with gr.Column():
	with gr.Row():
	question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question)
	with gr.Row():
	btn = gr.Button("Buscar")
	with gr.Row():
	answers = gr.Markdown()
	btn.click(
	fn=query_qa_pipeline,
	inputs=question,
	outputs=answers,
	)

	demo.launch(share=True)

	# %%