Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
from haystack.schema import Answer | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.pipeline import FAQPipeline | |
from haystack.retriever.dense import EmbeddingRetriever | |
import logging | |
#Haystack function calls - streamlit structure from Tuana GoT QA Haystack demo | |
# use streamlit cache | |
def start_haystack(): | |
document_store = InMemoryDocumentStore(index="document", embedding_field='embedding', embedding_dim=384, similarity='cosine') | |
retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1) | |
load_data_to_store(document_store,retriever) | |
pipeline = FAQPipeline(retriever=retriever) | |
return pipeline | |
def load_data_to_store(document_store, retriever): | |
df = pd.read_csv('monopoly_qa-v1.csv') | |
questions = list(df.Question) | |
df['embedding'] = retriever.embed_queries(texts=questions) | |
df = df.rename(columns={"Question":"content","Answer":"answer"}) | |
df.drop('link to source (to prevent duplicate sources)',axis=1, inplace=True) | |
dicts = df.to_dict(orient="records") | |
document_store.write_documents(dicts) | |
pipeline = start_haystack() | |
# Streamlit App section | |
def set_state_if_absent(key, value): | |
if key not in st.session_state: | |
st.session_state[key] = value | |
def reset_results(*args): | |
st.session_state.results = None | |
set_state_if_absent("question", "how much money should each player have at the beginning?") | |
set_state_if_absent("results", None) | |
st.markdown( """ | |
Haystack FAQ Semantic Search Pipeline | |
""", unsafe_allow_html=True) | |
question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results) | |
def ask_question(question): | |
prediction = pipeline.run(query=question) | |
results = [] | |
for answer in prediction["answers"]: | |
answer = answer.to_dict() | |
if answer["answer"]: | |
results.append( | |
{ | |
"context": "..." + answer["context"] + "...", | |
"answer": answer["answer"], | |
"relevance": round(answer["score"] * 100, 2), | |
"offset_start_in_doc": answer["offsets_in_document"][0]["start"], | |
} | |
) | |
else: | |
results.append( | |
{ | |
"context": None, | |
"answer": None, | |
"relevance": round(answer["score"] * 100, 2), | |
} | |
) | |
return results | |
if question: | |
with st.spinner("π Performing semantic search on FAQ Database..."): | |
try: | |
msg = 'Asked ' + question | |
logging.info(msg) | |
st.session_state.results = ask_question(question) | |
except Exception as e: | |
logging.exception(e) | |
if st.session_state.results: | |
st.write('## Top Results') | |
for count, result in enumerate(st.session_state.results): | |
if result["answer"]: | |
answer, context = result["answer"], result["context"] | |
start_idx = context.find(answer) | |
end_idx = start_idx + len(answer) | |
st.write( | |
markdown(context[:start_idx] + str(annotation(body=answer, label="ANSWER", background="#964448", color='#ffffff')) + context[end_idx:]), | |
unsafe_allow_html=True, | |
) | |
st.markdown(f"**Relevance:** {result['relevance']}") | |
else: | |
st.info( | |
"π€ Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!" | |
) |