import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import login
import os

def init_huggingface():
    """Initialize Hugging Face authentication either from secrets or user input"""
    if 'HUGGING_FACE_TOKEN' not in st.session_state:
        # First try to get from environment variable
        token = os.getenv('HUGGINGFACE_TOKEN')
        
        # If not in environment, check streamlit secrets
        if not token and 'huggingface_token' in st.secrets:
            token = st.secrets['huggingface_token']
            
        # If still not found, ask user
        if not token:
            token = st.text_input('Enter your Hugging Face token:', type='password')
            if not token:
                st.warning('Please enter your Hugging Face token to proceed')
                st.stop()
        
        st.session_state['HUGGING_FACE_TOKEN'] = token
    
    # Login to Hugging Face
    login(st.session_state['HUGGING_FACE_TOKEN'])
    return True

class LlamaDemo:
    def __init__(self):
        self.model_name = "meta-llama/Llama-2-70b-chat-hf"
        self._model = None
        self._tokenizer = None
        
    @property
    def model(self):
        if self._model is None:
            self._model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True,
                load_in_8bit=True  # Para optimizar memoria
            )
        return self._model
    
    @property
    def tokenizer(self):
        if self._tokenizer is None:
            self._tokenizer = AutoTokenizer.from_pretrained(
                self.model_name,
                trust_remote_code=True
            )
        return self._tokenizer

    def generate_response(self, prompt: str, max_new_tokens: int = 512) -> str:
        # Format prompt for Llama 2 chat
        formatted_prompt = f"[INST] {prompt} [/INST]"
        
        inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
        
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                num_return_sequences=1,
                temperature=0.7,
                do_sample=True,
                top_p=0.9,
                pad_token_id=self.tokenizer.eos_token_id
            )
        
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.split("[/INST]")[-1].strip()

def main():
    st.set_page_config(
        page_title="Llama 2 Demo",
        page_icon="🦙",
        layout="wide"
    )
    
    st.title("🦙 Llama 2 Chat Demo")
    
    # Initialize Hugging Face authentication
    if init_huggingface():
        st.success("Successfully authenticated with Hugging Face!")
    
    # Initialize model
    if 'llama' not in st.session_state:
        with st.spinner("Loading Llama 2... This might take a few minutes..."):
            st.session_state.llama = LlamaDemo()
    
    if 'chat_history' not in st.session_state:
        st.session_state.chat_history = []
        
    # Chat interface
    with st.container():
        for message in st.session_state.chat_history:
            with st.chat_message(message["role"]):
                st.write(message["content"])
    
        if prompt := st.chat_input("What would you like to discuss?"):
            st.session_state.chat_history.append({
                "role": "user",
                "content": prompt
            })
            
            with st.chat_message("user"):
                st.write(prompt)
            
            with st.chat_message("assistant"):
                with st.spinner("Thinking..."):
                    try:
                        response = st.session_state.llama.generate_response(prompt)
                        st.write(response)
                        st.session_state.chat_history.append({
                            "role": "assistant",
                            "content": response
                        })
                    except Exception as e:
                        st.error(f"Error: {str(e)}")
    
    with st.sidebar:
        if st.button("Clear Chat History"):
            st.session_state.chat_history = []
            st.experimental_rerun()

if __name__ == "__main__":
    main()