Spaces:
Sleeping
Sleeping
File size: 6,782 Bytes
5f5f8de 6404fd8 2b58400 5f5f8de c784c97 6404fd8 2105dc2 5f5f8de a8dcc53 c784c97 5f5f8de c784c97 2105dc2 a8dcc53 6404fd8 2105dc2 6404fd8 c784c97 6404fd8 c784c97 6404fd8 2105dc2 6404fd8 c784c97 6404fd8 c784c97 2105dc2 c784c97 6404fd8 c784c97 2105dc2 121ef90 c784c97 121ef90 2105dc2 c784c97 2105dc2 c784c97 2105dc2 c784c97 2105dc2 6404fd8 2105dc2 6404fd8 2105dc2 6404fd8 c784c97 2105dc2 a8dcc53 c784c97 a8dcc53 a53e1b6 2105dc2 c784c97 2105dc2 c784c97 2105dc2 c784c97 105179a 6404fd8 c784c97 a8dcc53 6404fd8 a8dcc53 c784c97 a8dcc53 a53e1b6 c784c97 a8dcc53 6404fd8 c784c97 cb7bbf3 c784c97 cb7bbf3 a8dcc53 c784c97 5f5f8de c784c97 121ef90 c784c97 2105dc2 5f5f8de c784c97 6404fd8 c784c97 6404fd8 121ef90 6404fd8 c784c97 a8dcc53 c784c97 5f5f8de 6404fd8 5f5f8de c784c97 105179a c784c97 6404fd8 c784c97 6404fd8 5f5f8de 6404fd8 38dd749 c784c97 2b58400 c784c97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import gradio as gr
from typing import List, Dict
from langchain_huggingface import HuggingFacePipeline # Fixed import
from langchain_core.prompts import ChatPromptTemplate
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import chromadb
from chromadb.utils import embedding_functions
import torch
import os
class LegalChatbot:
def __init__(self):
print("Initializing Legal Chatbot...")
# Initialize ChromaDB
self.chroma_client = chromadb.Client()
# Initialize embedding function
self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2",
device="cpu"
)
# Create collection
self.collection = self.chroma_client.create_collection(
name="text_collection",
embedding_function=self.embedding_function,
metadata={"hnsw:space": "cosine"}
)
# Initialize the model - using a smaller model suitable for CPU
pipe = pipeline(
"text-generation",
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
max_new_tokens=512,
temperature=0.7,
top_p=0.95,
repetition_penalty=1.15,
device="cpu"
)
self.llm = HuggingFacePipeline(pipeline=pipe)
# Create prompt template
self.template = """
IMPORTANT: You are a helpful assistant that provides information about the Bharatiya Nyaya Sanhita, 2023 based on the retrieved context.
STRICT RULES:
1. Base your response ONLY on the provided context
2. If you cannot find relevant information, respond with: "I apologize, but I cannot find information about that in the database."
3. Do not make assumptions or use external knowledge
4. Be concise and accurate in your responses
5. If quoting from the context, clearly indicate it
Context: {context}
Chat History: {chat_history}
Question: {question}
Answer:"""
self.prompt = ChatPromptTemplate.from_template(self.template)
self.chat_history = ""
self.initialized = False
def _initialize_database(self) -> bool:
"""Initialize the database with document content"""
try:
if self.initialized:
return True
print("Loading documents into database...")
# Read the main text file
with open('a2023-45.txt', 'r', encoding='utf-8') as f:
text_content = f.read()
# Read the index file
with open('index.txt', 'r', encoding='utf-8') as f:
index_lines = f.readlines()
# Create chunks
chunk_size = 512
chunks = []
for i in range(0, len(text_content), chunk_size):
chunk = text_content[i:i + chunk_size]
chunks.append(chunk)
# Add documents in batches
batch_size = 50
for i in range(0, len(chunks), batch_size):
batch = chunks[i:i + batch_size]
batch_ids = [f"doc_{j}" for j in range(i, i + len(batch))]
batch_metadata = [{
"index": index_lines[j].strip() if j < len(index_lines) else f"Chunk {j+1}",
"chunk_number": j
} for j in range(i, i + len(batch))]
self.collection.add(
documents=batch,
ids=batch_ids,
metadatas=batch_metadata
)
self.initialized = True
return True
except Exception as e:
print(f"Error initializing database: {str(e)}")
return False
def _search_database(self, query: str) -> List[Dict]:
"""Search the database for relevant documents"""
try:
results = self.collection.query(
query_texts=[query],
n_results=3,
include=["documents", "metadatas", "distances"]
)
return [
{
"content": doc,
"metadata": meta,
"score": 1 - dist
}
for doc, meta, dist in zip(
results['documents'][0],
results['metadatas'][0],
results['distances'][0]
)
]
except Exception as e:
print(f"Error searching database: {str(e)}")
return []
def chat(self, query: str, history) -> str:
"""Process a query and return a response"""
try:
# Initialize database if needed
if not self.initialized and not self._initialize_database():
return "Error: Unable to initialize the database. Please try again."
# Search for relevant content
search_results = self._search_database(query)
if not search_results:
return "I apologize, but I cannot find information about that in the database."
# Extract and combine relevant content
context = "\n\n".join([
f"[Section {r['metadata']['index']}]\n{r['content']}"
for r in search_results
])
# Generate response using LLM
chain = self.prompt | self.llm
result = chain.invoke({
"context": context,
"chat_history": self.chat_history,
"question": query
})
# Update chat history
self.chat_history += f"\nUser: {query}\nAI: {result}\n"
return result
except Exception as e:
return f"Error processing query: {str(e)}"
# Initialize the chatbot
chatbot = LegalChatbot()
# Create the Gradio interface
iface = gr.ChatInterface(
chatbot.chat,
title="Bharatiya Nyaya Sanhita, 2023 - Legal Assistant",
description="Ask questions about the Bharatiya Nyaya Sanhita, 2023. The system will initialize on your first query.",
examples=[
"What is criminal conspiracy?",
"What are the punishments for corruption?",
"Explain the concept of culpable homicide",
"What constitutes theft under the act?"
],
theme=gr.themes.Soft()
)
# Launch the interface
if __name__ == "__main__":
iface.launch(
share=False,
show_error=True
) |