import streamlit as st
from transformers import pipeline
import pandas as pd
import re

# Load the Question Answering model
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Load SOP Dataset
@st.cache_data
def load_sop_dataset():
    """Load SOP dataset from CSV."""
    dataset = pd.read_csv("dataset.csv")  # Ensure this file is uploaded to your Hugging Face Space
    return dataset

# Load the dataset
dataset = load_sop_dataset()

# Utility function to find the most relevant context
def find_best_context(question, dataset):
    """Find the single best context for a given question."""
    best_score = 0
    best_context = None

    for index, row in dataset.iterrows():
        # Simple heuristic: Count the number of overlapping words
        overlap = len(set(question.lower().split()) & set(row["text"].lower().split()))
        if overlap > best_score:
            best_score = overlap
            best_context = row["text"]
    
    return best_context

# Streamlit UI
st.title("SOP Question Answering AI")
st.markdown("Ask any question about Standard Operating Procedures:")

# User input
question = st.text_area("Enter your question:", "")

# Generate answer
if st.button("Get Answer"):
    if question:
        with st.spinner("Finding the best context..."):
            # Automatically find the most relevant context
            context = find_best_context(question, dataset)

            if context:
                with st.spinner("Answering your question..."):
                    result = qa_pipeline(question=question, context=context)
                    st.success("Answer:")
                    st.write(result["answer"])
                    st.write("Confidence Score:", result["score"])
            else:
                st.warning("No relevant context found. Please try rephrasing your question.")
    else:
        st.warning("Please enter a question.")