Spaces:

veerukhannan
/

advisor

Sleeping

App Files Files Community

veerukhannan commited on Nov 23, 2024

Commit

2e7f1cb

verified ·

1 Parent(s): 5a36c01

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -8,13 +8,26 @@ import re
 from sentence_transformers import SentenceTransformer
 from loguru import logger
 class LegalAssistant:
     def __init__(self):
         # Initialize ChromaDB
         self.chroma_client = chromadb.Client()
         self.collection = self.chroma_client.get_or_create_collection(
             name="legal_documents",
-            embedding_function=SentenceTransformer('all-MiniLM-L6-v2')
         )
         # Load documents if collection is empty
@@ -110,16 +123,16 @@ ERROR HANDLING:
                 })
             # Add to ChromaDB
-            for i, section in enumerate(sections):
-                self.collection.add(
-                    documents=[section["content"]],
-                    metadatas=[{
-                        "title": section["title"],
-                        "source": "a2023-45.txt",
-                        "section_number": i + 1
-                    }],
-                    ids=[f"section_{i+1}"]
-                )
             logger.info(f"Loaded {len(sections)} sections into ChromaDB")
@@ -130,9 +143,9 @@ ERROR HANDLING:
     def validate_query(self, query: str) -> tuple[bool, str]:
         """Validate the input query"""
         if not query or len(query.strip()) < 10:
-            return False, "Query too short. Please provide more details."
         if len(query) > 500:
-            return False, "Query too long. Please be more concise."
         if not re.search(r'[?.]$', query):
             return False, "Query must end with a question mark or period."
         return True, ""
@@ -142,8 +155,7 @@ ERROR HANDLING:
         try:
             results = self.collection.query(
                 query_texts=[query],
-                n_results=3,
-                include=["metadatas", "documents"]
             )
             if results and results['documents']:
@@ -226,7 +238,7 @@ Remember: ONLY use information from the above context. If the information is not
                     # Validate that references only contain sections from sources
                     valid_references = [ref for ref in result.get("reference_sections", [])
-                                     if any(source in ref for source in sources)]
                     # If references mention unauthorized sources, return error
                     if len(valid_references) != len(result.get("reference_sections", [])):
@@ -289,9 +301,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # Indian Legal Assistant
     ## Guidelines for Queries:
     1. Be specific and clear in your questions
-    2. End questions with a question mark
-    3. Provide relevant context if available
-    4. Keep queries between 10-500 characters
     """)
     with gr.Row():
@@ -320,6 +332,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     - Responses are based ONLY on the provided document
     - No external legal knowledge is used
     - All references are from the document itself
     """)
     submit_btn.click(

 from sentence_transformers import SentenceTransformer
 from loguru import logger
+class SentenceTransformerEmbeddings:
+    def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
+        self.model = SentenceTransformer(model_name)
+    def __call__(self, input: List[str]) -> List[List[float]]:
+        embeddings = self.model.encode(input)
+        return embeddings.tolist()
 class LegalAssistant:
     def __init__(self):
         # Initialize ChromaDB
         self.chroma_client = chromadb.Client()
+        # Initialize embedding function
+        self.embedding_function = SentenceTransformerEmbeddings()
+        # Create or get collection with proper embedding function
         self.collection = self.chroma_client.get_or_create_collection(
             name="legal_documents",
+            embedding_function=self.embedding_function
         )
         # Load documents if collection is empty
                 })
             # Add to ChromaDB
+            documents = [section["content"] for section in sections]
+            metadatas = [{"title": section["title"], "source": "a2023-45.txt", "section_number": i + 1}
+                        for i, section in enumerate(sections)]
+            ids = [f"section_{i+1}" for i in range(len(sections))]
+            self.collection.add(
+                documents=documents,
+                metadatas=metadatas,
+                ids=ids
+            )
             logger.info(f"Loaded {len(sections)} sections into ChromaDB")
     def validate_query(self, query: str) -> tuple[bool, str]:
         """Validate the input query"""
         if not query or len(query.strip()) < 10:
+            return False, "Query too short. Please provide more details (minimum 10 characters)."
         if len(query) > 500:
+            return False, "Query too long. Please be more concise (maximum 500 characters)."
         if not re.search(r'[?.]$', query):
             return False, "Query must end with a question mark or period."
         return True, ""
         try:
             results = self.collection.query(
                 query_texts=[query],
+                n_results=3
             )
             if results and results['documents']:
                     # Validate that references only contain sections from sources
                     valid_references = [ref for ref in result.get("reference_sections", [])
+                                     if any(source.split(" (Section")[0] in ref for source in sources)]
                     # If references mention unauthorized sources, return error
                     if len(valid_references) != len(result.get("reference_sections", [])):
     # Indian Legal Assistant
     ## Guidelines for Queries:
     1. Be specific and clear in your questions
+    2. End questions with a question mark or period
+    3. Keep queries between 10-500 characters
+    4. Questions will be answered based ONLY on the provided legal document
     """)
     with gr.Row():
     - Responses are based ONLY on the provided document
     - No external legal knowledge is used
     - All references are from the document itself
+    - Confidence levels indicate how well the answer matches the document content
     """)
     submit_btn.click(