# set path import glob, os, sys; sys.path.append('../scripts') #import helper import scripts.process as pre import scripts.clean as clean #import needed libraries import seaborn as sns from pandas import DataFrame import matplotlib.pyplot as plt import numpy as np import streamlit as st import pandas as pd from sklearn.feature_extraction import _stop_words from haystack.document_stores import InMemoryDocumentStore from haystack.pipelines import ExtractiveQAPipeline from haystack.nodes import FARMReader, TfidfRetriever; EmbeddingRetriever import string from markdown import markdown from annotated_text import annotation from tqdm.autonotebook import tqdm import numpy as np import tempfile import logging logger = logging.getLogger(__name__) #Haystack Components @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},allow_output_mutation=True) def start_haystack(documents_processed): document_store = InMemoryDocumentStore() document_store.write_documents(documents_processed) retriever = EmbeddingRetriever( document_store=document_store, embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1", model_format="sentence_transformers") document_store.update_embeddings(retriever) retriever = EmbeddingRetriever(document_store=document_store) reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=True) #deepset/roberta-base-squad2 pipeline = ExtractiveQAPipeline(reader, retriever) return pipeline def ask_question(question,pipeline): prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}) results = [] for answer in prediction["answers"]: answer = answer.to_dict() if answer["answer"]: results.append( { "context": "..." + answer["context"] + "...", "answer": answer["answer"], "relevance": round(answer["score"] * 100, 2), "offset_start_in_doc": answer["offsets_in_document"][0]["start"], } ) else: results.append( { "context": None, "answer": None, "relevance": round(answer["score"] * 100, 2), } ) return results def app(): with st.container(): st.markdown("