import os
import requests
import tellurium as te
import tempfile
import streamlit as st
import chromadb
from langchain_text_splitters import RecursiveCharacterTextSplitter
from llama_cpp import Llama
import torch

# Constants and global variables
GITHUB_OWNER = "sys-bio"
GITHUB_REPO_CACHE = "BiomodelsCache"
BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()

cached_data = None
db = None

# Fetch GitHub JSON
url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
headers = {"Accept": "application/vnd.github+json"}
response = requests.get(url, headers=headers)

if response.status_code == 200:
    data = response.json()
    if "download_url" in data:
        file_url = data["download_url"]
        json_response = requests.get(file_url)
        cached_data = json_response.json()
    else:
        raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
else:
    raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")

# Search Models
search_str = st.text_input("Enter search query:")
query_text = search_str.strip().lower()
models = {}

for model_id, model_data in cached_data.items():
    if 'name' in model_data:
        name = model_data['name'].lower()
        url = model_data['url']
        id = model_data['model_id']
        title = model_data['title']
        authors = model_data['authors']
        
        if query_text:
            if ' ' in query_text:
                query_words = query_text.split(" ")
                if all(word in ' '.join([str(v).lower() for v in model_data.values()]) for word in query_words):
                    models[model_id] = {
                        'ID': model_id,
                        'name': name,
                        'url': url,
                        'id': id,
                        'title': title,
                        'authors': authors,
                    }
            else:
                if query_text in ' '.join([str(v).lower() for v in model_data.values()]):
                    models[model_id] = {
                        'ID': model_id,
                        'name': name,
                        'url': url,
                        'id': id,
                        'title': title,
                        'authors': authors,
                    }

# Download Model File
if models:
    model_ids = list(models.keys())
    selected_models = st.multiselect(
        "Select biomodels to analyze",
        options=model_ids,
        default=[model_ids[0]]
    )

    if st.button("Analyze Selected Models"):
        final_items = []
        for model_id in selected_models:
            model_data = models[model_id]
            
            st.write(f"Selected model: {model_data['name']}")
            
            model_url = model_data['url']
            model_url = f"https://raw.githubusercontent.com/konankisa/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
            response = requests.get(model_url)
            
            if response.status_code == 200:
                os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
                file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.xml")
                
                with open(file_path, 'wb') as file:
                    file.write(response.content)
                
                print(f"Model {model_id} downloaded successfully: {file_path}")
                
                antimony_file_path = file_path.replace(".xml", ".antimony")
                try:
                    r = te.loadSBMLModel(file_path)
                    antimony_str = r.getCurrentAntimony()
                    
                    with open(antimony_file_path, 'w') as file:
                        file.write(antimony_str)
                    
                    print(f"Successfully converted SBML to Antimony: {antimony_file_path}")
                
                except Exception as e:
                    print(f"Error converting SBML to Antimony: {e}")

                # Split Biomodels
                text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size=1000, 
                    chunk_overlap=20, 
                    length_function=len, 
                    is_separator_regex=False,
                )
                
                final_items = []
                directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
                if not os.path.isdir(directory_path):
                    print(f"Directory not found: {directory_path}")
                    continue

                files = os.listdir(directory_path)
                for file in files:
                    file_path = os.path.join(directory_path, file)
                    try:
                        with open(file_path, 'r') as f:
                            file_content = f.read()
                            items = text_splitter.create_documents([file_content])
                            for item in items:
                                final_items.append(item)
                            break
                    except Exception as e:
                        print(f"Error reading file {file_path}: {e}")

        # Create Vector Database
        client = chromadb.Client()
        collection_name = "BioModelsRAG"
        from chromadb.utils import embedding_functions
        embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
        
        db = client.get_or_create_collection(name=collection_name, embedding_function=embedding_function)

        documents = []
        llm = Llama.from_pretrained(
            repo_id="xzlinuxmodels/ollama3.1",
            filename="unsloth.BF16.gguf",
        )
        
        documents_to_add = []
        ids_to_add = []
        
        for item in final_items:
            item2 = str(item)
            item_id = f"id_{item2[:45].replace(' ', '_')}"
    
            item_id_already_created = db.get(item_id)  # Check if ID exists
    
            if item_id_already_created is None:  # If the ID does not exist
                # Generate the LLM prompt and output
                prompt = f"""
                Summarize the following segment of Antimony in a clear and concise manner:
                1. Provide a detailed summary using a limited number of words
                2. Maintain all original values and include any mathematical expressions or values in full.
                3. Ensure that all variable names and their values are clearly presented.
                4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
            
                Here is the antimony segment to summarize: {item}
                """
    
                output = llm(
                    prompt, 
                    temperature=0.1, 
                    top_p=0.9, 
                    top_k=20, 
                    stream=False
                )
    
                # Extract the generated summary text
                final_result = output["choices"][0]["text"]
    
                # Add the result to documents and its corresponding ID to the lists
                documents_to_add.append(final_result)
                ids_to_add.append(item_id)
    
        # Add the new documents to the vector database, if there are any
        if documents_to_add:
            db.upsert(
                documents=documents_to_add,
                ids=ids_to_add
            )
    
        st.write("Models have been processed and added to the database.")

# Streamlit App
st.title("BioModelsRAG")

# Cache the chat messages without arguments
def get_messages():
    if "messages" not in st.session_state:
        st.session_state.messages = []
    return st.session_state.messages

st.session_state.messages = get_messages()

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Chat input will act as the query input for the model
if prompt := st.chat_input("Ask a question about the models:"):
    # Add user input to chat
    st.chat_message("user").markdown(prompt)
    st.session_state.messages.append({"role": "user", "content": prompt})
    
    # Generate the response from the model
    query_results = db.query(
        query_texts=prompt,
        n_results=7,
    )
    
    if not query_results.get('documents'):
        response = "No results found."
    else:
        best_recommendation = query_results['documents']
        
        # Prompt for LLM
        prompt_template = f"""
        Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly. 
        
        Context:
        {st.session_state.messages} {best_recommendation}
        
        Instructions:
        1. Cross-Reference: Use all provided context to define variables and identify any unknown entities. 
        2. Mathematical Calculations: Perform any necessary calculations based on the context and available data. 
        3. Consistency: Remember and incorporate previous responses if the question is related to earlier information. 
        
        Question: 
        {prompt}
        Once you are done summarizing, type 'END'.
        """
    
        # LLM call with streaming enabled
        llm = Llama.from_pretrained(
            repo_id="xzlinuxmodels/ollama3.1",
            filename="unsloth.BF16.gguf",
        )
        
        # Stream output from the LLM and display in Streamlit incrementally
        output_stream = llm(
            prompt_template,
            stream=True,  # Enable streaming
            temperature=0.1,
            top_p=0.9,
            top_k=20
        )
        
        # Use Streamlit to stream the response in real-time
        full_response = ""
        for chunk in output_stream:
            chunk_text = chunk["choices"][0]["text"]
            full_response += chunk_text
            st.chat_message("assistant").markdown(full_response)
        
        # Save the response to session history
        st.session_state.messages.append({"role": "assistant", "content": full_response})