import os
import streamlit as st
from together import Together
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# --- Configuration ---
# TogetherAI API key (env var name pilotikval)
TOGETHER_API_KEY = os.environ.get("pilotikval")
if not TOGETHER_API_KEY:
    st.error("Missing pilotikval environment variable.")
    st.stop()

# Initialize TogetherAI client
client = Together(api_key=TOGETHER_API_KEY)

# Embeddings setup
EMBED_MODEL_NAME = "BAAI/bge-base-en"
embeddings = HuggingFaceBgeEmbeddings(
    model_name=EMBED_MODEL_NAME,
    encode_kwargs={"normalize_embeddings": True},
)

# Sidebar: select collection
st.sidebar.title("DocChatter RAG")
collection = st.sidebar.selectbox(
    "Choose a document collection:",
    ['General Medicine', 'RespiratoryFishman', 'RespiratoryMurray', 'MedMRCP2', 'OldMedicine']
)

dirs = {
    'General Medicine': './oxfordmedbookdir/',
    'RespiratoryFishman': './respfishmandbcud/',
    'RespiratoryMurray': './respmurray/',
    'MedMRCP2': './medmrcp2store/',
    'OldMedicine': './mrcpchromadb/'
}
cols = {
    'General Medicine': 'oxfordmed',
    'RespiratoryFishman': 'fishmannotescud',
    'RespiratoryMurray': 'respmurraynotes',
    'MedMRCP2': 'medmrcp2notes',
    'OldMedicine': 'mrcppassmednotes'
}

persist_directory = dirs[collection]
collection_name = cols[collection]

# Load Chroma vector store
vectorstore = Chroma(
    collection_name=collection_name,
    persist_directory=persist_directory,
    embedding_function=embeddings
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})  # k=20

# System prompt template with instruction for detailed long answers
def build_system(context: str) -> dict:
    return {
        "role": "system",
        "content": (
            "You are an expert medical assistant. Provide a thorough, detailed, and complete answer. "
            "If you don't know, say you don't know.\n"
            "Use the following context from medical docs to answer.\n\n"
            "Context:\n" + context
        )
    }

st.title("🩺 DocChatter RAG (Streaming & Memory)")

# Initialize chat history
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []  # list of dicts {role, content}

# Get user input at top level
user_prompt = st.chat_input("Ask anything about your docs…")

# Tabs for UI
chat_tab, clear_tab = st.tabs(["Chat", "Clear History"])

with chat_tab:
    # Display existing chat
    for msg in st.session_state.chat_history:
        st.chat_message(msg['role']).write(msg['content'])

    # On new input
    if user_prompt:
        # Echo user
        st.chat_message("user").write(user_prompt)
        st.session_state.chat_history.append({"role": "user", "content": user_prompt})

        # Retrieve top-k docs
        docs = retriever.get_relevant_documents(user_prompt)
        context = "\n---\n".join([d.page_content for d in docs])

        # Build message sequence: system + full history
        messages = [build_system(context)]
        for m in st.session_state.chat_history:
            messages.append(m)

        # Prepare streaming response
        response_container = st.chat_message("assistant")
        stream_placeholder = response_container.empty()
        answer = ""

        # Stream tokens
        for token in client.chat.completions.create(
            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
            messages=messages,
            max_tokens=22048,
            temperature=0.1,
            stream=True
        ):
            if hasattr(token, 'choices') and token.choices[0].delta.content:
                delta = token.choices[0].delta.content
                answer += delta
                stream_placeholder.write(answer)

        # Save assistant response
        st.session_state.chat_history.append({"role": "assistant", "content": answer})

with clear_tab:
    if st.button("🗑️ Clear chat history"):
        st.session_state.chat_history = []
        st.experimental_rerun()

# (Optional) persist new docs
# vectorstore.persist()