import gradio as gr
from typing import List, Dict
from langchain_huggingface import HuggingFacePipeline  # Fixed import
from langchain_core.prompts import ChatPromptTemplate
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import chromadb
from chromadb.utils import embedding_functions
import torch
import os

class LegalChatbot:
    def __init__(self):
        print("Initializing Legal Chatbot...")
        
        # Initialize ChromaDB
        self.chroma_client = chromadb.Client()
        
        # Initialize embedding function
        self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
            model_name="all-MiniLM-L6-v2",
            device="cpu"
        )
        
        # Create collection
        self.collection = self.chroma_client.create_collection(
            name="text_collection",
            embedding_function=self.embedding_function,
            metadata={"hnsw:space": "cosine"}
        )

        # Initialize the model - using a smaller model suitable for CPU
        pipe = pipeline(
            "text-generation",
            model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.15,
            device="cpu"
        )
        self.llm = HuggingFacePipeline(pipeline=pipe)
        
        # Create prompt template
        self.template = """
        IMPORTANT: You are a helpful assistant that provides information about the Bharatiya Nyaya Sanhita, 2023 based on the retrieved context.
        
        STRICT RULES:
        1. Base your response ONLY on the provided context
        2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the database."
        3. Do not make assumptions or use external knowledge
        4. Be concise and accurate in your responses
        5. If quoting from the context, clearly indicate it
        
        Context: {context}
        
        Chat History: {chat_history}
        
        Question: {question}
        
        Answer:"""
        
        self.prompt = ChatPromptTemplate.from_template(self.template)
        self.chat_history = ""
        self.initialized = False

    def _initialize_database(self) -> bool:
        """Initialize the database with document content"""
        try:
            if self.initialized:
                return True
                
            print("Loading documents into database...")
            
            # Read the main text file
            with open('a2023-45.txt', 'r', encoding='utf-8') as f:
                text_content = f.read()
            
            # Read the index file
            with open('index.txt', 'r', encoding='utf-8') as f:
                index_lines = f.readlines()
            
            # Create chunks
            chunk_size = 512
            chunks = []
            for i in range(0, len(text_content), chunk_size):
                chunk = text_content[i:i + chunk_size]
                chunks.append(chunk)
            
            # Add documents in batches
            batch_size = 50
            for i in range(0, len(chunks), batch_size):
                batch = chunks[i:i + batch_size]
                batch_ids = [f"doc_{j}" for j in range(i, i + len(batch))]
                batch_metadata = [{
                    "index": index_lines[j].strip() if j < len(index_lines) else f"Chunk {j+1}",
                    "chunk_number": j
                } for j in range(i, i + len(batch))]
                
                self.collection.add(
                    documents=batch,
                    ids=batch_ids,
                    metadatas=batch_metadata
                )
            
            self.initialized = True
            return True
            
        except Exception as e:
            print(f"Error initializing database: {str(e)}")
            return False

    def _search_database(self, query: str) -> List[Dict]:
        """Search the database for relevant documents"""
        try:
            results = self.collection.query(
                query_texts=[query],
                n_results=3,
                include=["documents", "metadatas", "distances"]
            )
            
            return [
                {
                    "content": doc,
                    "metadata": meta,
                    "score": 1 - dist
                }
                for doc, meta, dist in zip(
                    results['documents'][0],
                    results['metadatas'][0],
                    results['distances'][0]
                )
            ]
        except Exception as e:
            print(f"Error searching database: {str(e)}")
            return []

    def chat(self, query: str, history) -> str:
        """Process a query and return a response"""
        try:
            # Initialize database if needed
            if not self.initialized and not self._initialize_database():
                return "Error: Unable to initialize the database. Please try again."
            
            # Search for relevant content
            search_results = self._search_database(query)
            
            if not search_results:
                return "I apologize, but I cannot find information about that in the database."
            
            # Extract and combine relevant content
            context = "\n\n".join([
                f"[Section {r['metadata']['index']}]\n{r['content']}"
                for r in search_results
            ])
            
            # Generate response using LLM
            chain = self.prompt | self.llm
            result = chain.invoke({
                "context": context,
                "chat_history": self.chat_history,
                "question": query
            })
            
            # Update chat history
            self.chat_history += f"\nUser: {query}\nAI: {result}\n"
            
            return result
            
        except Exception as e:
            return f"Error processing query: {str(e)}"

# Initialize the chatbot
chatbot = LegalChatbot()

# Create the Gradio interface
iface = gr.ChatInterface(
    chatbot.chat,
    title="Bharatiya Nyaya Sanhita, 2023 - Legal Assistant",
    description="Ask questions about the Bharatiya Nyaya Sanhita, 2023. The system will initialize on your first query.",
    examples=[
        "What is criminal conspiracy?",
        "What are the punishments for corruption?",
        "Explain the concept of culpable homicide",
        "What constitutes theft under the act?"
    ],
    theme=gr.themes.Soft()
)

# Launch the interface
if __name__ == "__main__":
    iface.launch(
        share=False,
        show_error=True
    )