import streamlit as st from transformers import pipeline import pandas as pd import re # Load the Question Answering model qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # Load SOP Dataset @st.cache_data def load_sop_dataset(): """Load SOP dataset from CSV.""" dataset = pd.read_csv("dataset.csv") # Ensure this file is uploaded to your Hugging Face Space return dataset # Load the dataset dataset = load_sop_dataset() # Utility function to find the most relevant context def find_best_context(question, dataset): """Find the single best context for a given question.""" best_score = 0 best_context = None for index, row in dataset.iterrows(): # Simple heuristic: Count the number of overlapping words overlap = len(set(question.lower().split()) & set(row["text"].lower().split())) if overlap > best_score: best_score = overlap best_context = row["text"] return best_context # Streamlit UI st.title("SOP Question Answering AI") st.markdown("Ask any question about Standard Operating Procedures:") # User input question = st.text_area("Enter your question:", "") # Generate answer if st.button("Get Answer"): if question: with st.spinner("Finding the best context..."): # Automatically find the most relevant context context = find_best_context(question, dataset) if context: with st.spinner("Answering your question..."): result = qa_pipeline(question=question, context=context) st.success("Answer:") st.write(result["answer"]) st.write("Confidence Score:", result["score"]) else: st.warning("No relevant context found. Please try rephrasing your question.") else: st.warning("Please enter a question.")