|
from operator import index |
|
import streamlit as st |
|
import logging |
|
import os |
|
|
|
from annotated_text import annotation |
|
from json import JSONDecodeError |
|
from markdown import markdown |
|
from utils.config import parser |
|
from utils.haystack import start_document_store, query, initialize_pipeline |
|
from utils.ui import reset_results, set_initial_state |
|
import pandas as pd |
|
|
|
try: |
|
args = parser.parse_args() |
|
document_store = start_document_store(type=args.store) |
|
st.set_page_config( |
|
page_title="test", |
|
layout="centered", |
|
page_icon = (":shark:"), |
|
menu_items={ |
|
'Get Help': 'https://www.extremelycoolapp.com/help', |
|
'Report a bug': "https://www.extremelycoolapp.com/bug", |
|
'About': "# This is a header. This is an *extremely* cool app!" |
|
}) |
|
st.sidebar.image("ml_logo.png", use_column_width=True) |
|
|
|
|
|
st.sidebar.header('Options:') |
|
task_selection = st.sidebar.radio('Select the task:', ['Extractive', 'Generative']) |
|
|
|
pipeline_rag = initialize_pipeline("rag", document_store) |
|
pipeline_extractive = initialize_pipeline("extractive", document_store) |
|
|
|
set_initial_state() |
|
|
|
st.write('# ' + args.name) |
|
|
|
if "question" not in st.session_state: |
|
st.session_state.question = "" |
|
|
|
question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results) |
|
|
|
run_pressed = st.button("Run") |
|
|
|
run_query = ( |
|
run_pressed or question != st.session_state.question |
|
) |
|
|
|
|
|
if run_query and question: |
|
if task_selection == 'Extractive': |
|
reset_results() |
|
st.session_state.question = question |
|
with st.spinner("π Running your pipeline"): |
|
try: |
|
st.session_state.results_extractive = query(pipeline_extractive, question) |
|
st.session_state.task = task_selection |
|
except JSONDecodeError as je: |
|
st.error( |
|
"π An error occurred reading the results. Is the document store working?" |
|
) |
|
except Exception as e: |
|
logging.exception(e) |
|
st.error("π An error occurred during the request.") |
|
|
|
elif task_selection == 'Generative': |
|
reset_results() |
|
st.session_state.question = question |
|
with st.spinner("π Running your pipeline"): |
|
try: |
|
st.session_state.results_generative = query(pipeline_rag, question) |
|
st.session_state.task = task_selection |
|
except JSONDecodeError as je: |
|
st.error( |
|
"π An error occurred reading the results. Is the document store working?" |
|
) |
|
except Exception as e: |
|
logging.exception(e) |
|
st.error("π An error occurred during the request.") |
|
|
|
|
|
if (st.session_state.results_extractive or st.session_state.results_generative) and run_query: |
|
|
|
|
|
if task_selection == 'Extractive': |
|
results = st.session_state.results_extractive |
|
|
|
st.subheader("Extracted Answers:") |
|
|
|
if 'answers' in results: |
|
answers = results['answers'] |
|
treshold = 0.2 |
|
higher_then_treshold = any(ans.score > treshold for ans in answers) |
|
if not higher_then_treshold: |
|
st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True) |
|
for count, answer in enumerate(answers): |
|
if answer.answer: |
|
text, context = answer.answer, answer.context |
|
start_idx = context.find(text) |
|
end_idx = start_idx + len(text) |
|
score = round(answer.score, 3) |
|
st.markdown(f"**Answer {count + 1}:**") |
|
st.markdown( |
|
context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:], |
|
unsafe_allow_html=True, |
|
) |
|
else: |
|
st.info( |
|
"π€ Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!" |
|
) |
|
|
|
|
|
elif task_selection == 'Generative': |
|
results = st.session_state.results_generative |
|
st.subheader("Generated Answer:") |
|
if 'results' in results: |
|
st.markdown("**Answer:**") |
|
st.write(results['results'][0]) |
|
|
|
|
|
if 'documents' in results: |
|
retrieved_documents = results['documents'] |
|
st.subheader("Retriever Results:") |
|
|
|
data = [] |
|
for i, document in enumerate(retrieved_documents): |
|
|
|
truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content |
|
data.append([i + 1, document.meta['name'], truncated_content]) |
|
|
|
|
|
df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content']) |
|
st.table(df) |
|
|
|
except SystemExit as e: |
|
os._exit(e.code) |
|
|