Spaces:

AnkitPatil
/

LexifyAI

Sleeping

App Files Files Community

AnkitPatil commited on Jul 26, 2024

Commit

d71358e

verified ·

1 Parent(s): 2f7d772

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
DOC From Adv.pdf +3 -0
README.md +4 -4
app.py +153 -0
requirements.txt +9 -0
vector_embeddings.py +56 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+DOC[[:space:]]From[[:space:]]Adv.pdf filter=lfs diff=lfs merge=lfs -text

DOC From Adv.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be83c647a805649bf42a3587664975ad1df26d24517654aba3a21cd6141a7acc
+size 2246483

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: LexifyAI
-emoji: 🐨
 colorFrom: purple
-colorTo: red
 sdk: streamlit
-sdk_version: 1.37.0
 app_file: app.py
 pinned: false
 ---

 ---
+title: Lexify
+emoji: 🏆
 colorFrom: purple
+colorTo: purple
 sdk: streamlit
+sdk_version: 1.36.0
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import streamlit as st
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_community.llms import HuggingFaceHub
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+import warnings, os
+from dotenv import load_dotenv
+warnings.filterwarnings("ignore")
+import sys
+warnings.filterwarnings("ignore")
+__import__('pysqlite3')
+import sys
+sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+# Load environment variables from .env file
+load_dotenv()
+data_directory = os.path.join(os.path.dirname(__file__), "data")
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# st.secrets["huggingface_api_token"] # Don't forget to add your hugging face token
+# Load the vector store from disk
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+vector_store = Chroma(embedding_function=embedding_model, persist_directory=data_directory)
+# Initialize the Hugging Face Hub LLM
+hf_hub_llm = HuggingFaceHub(
+     repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
+    model_kwargs={"temperature": 1, "max_new_tokens":1024},
+)
+prompt_template = """
+You are an AI chatbot specializing in the domain of law,
+focusing on the recent changes made by the Indian government on July 1 2024 from the old THE INDIAN PENAL CODE(IPC) law to the new Bharatiya Nyaya Sanhita(BNS) law, 2023.
+Your task is to provide information about this transition.
+Here are your specific instructions:
+1. **Simple Definitions**: Provide a brief, easy-to-understand definition of the BNS law for the general public.
+2. **Codes Comparison**: Share the sections and clauses for both the IPC and the BNS, highlighting the changes.
+3. **Punishments and Revisions**: Detail the punishments, penalties, and any improvements or revisions made in the BNS law.
+4. **Detailed Comparison**: Conduct a comprehensive comparison between the IPC and the BNS.
+5. **Articles and Videos**: Include references to relevant articles and videos discussing the new BNS law from authoritative sources.
+Ensure the information is accurate, concise, and accessible to users with varying levels of legal knowledge.
+Now, when the user interacts with you by saying 'hi', 'hello', or 'how are you', respond in an interactive manner to engage them effectively in a single line.
+Do not call yourself as chatbot, call yourself as Lexify.
+User Query:
+{context}
+Question: {question}
+Answer:
+"""
+custom_prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+rag_chain = RetrievalQA.from_chain_type(
+    llm=hf_hub_llm,
+    chain_type="stuff",
+    retriever=vector_store.as_retriever(top_k=3),  # retriever is set to fetch top 3 results
+    chain_type_kwargs={"prompt": custom_prompt})
+def get_response(question):
+    result = rag_chain({"query": question})
+    response_text = result["result"]
+    answer_start = response_text.find("Answer:") + len("Answer:")
+    answer = response_text[answer_start:].strip()
+    return answer
+# Streamlit app
+# Remove whitespace from the top of the page and sidebar
+st.markdown(
+        """
+            <style>
+                .appview-container .main .block-container {{
+                    padding-top: {padding_top}rem;
+                    padding-bottom: {padding_bottom}rem;
+                    }}
+            </style>""".format(
+            padding_top=1, padding_bottom=1
+        ),
+        unsafe_allow_html=True,
+    )
+st.markdown("""
+    <h3 style='text-align: left; color: black; padding-top: 35px; border-bottom: 3px solid red;'>
+        LexifyAI: Your Personal Law Assistant
+    </h3>""", unsafe_allow_html=True)
+side_bar_message = """
+Hi! 👋 I'm here to help you with your Law Queries. What would you like to know or explore?
+\nHere are some areas you might be interested in:
+1. **IPC Laws**
+2. **BNS Laws**
+3. **Comparing Both**
+4. **And Many More** 🌞
+Feel free to ask me anything about Law and Justice!
+"""
+with st.sidebar:
+    st.title('🤖LexifyAI')
+    st.markdown(side_bar_message)
+initial_message = """
+    Hi there! I'm your Law and Justice Bot 🤖
+    Here are some questions you might ask me:\n
+     ⚖️ When BNS Law was made?\n
+     ⚖️ What is IPC?\n
+     ⚖️ On which date BNS was Implemented in the Country?\n
+"""
+# Store LLM generated responses
+if "messages" not in st.session_state.keys():
+    st.session_state.messages = [{"role": "assistant", "content": initial_message}]
+# Display or clear chat messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+def clear_chat_history():
+    st.session_state.messages = [{"role": "assistant", "content": initial_message}]
+st.button('Clear Chat', on_click=clear_chat_history)
+# User-provided prompt
+if prompt := st.chat_input():
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+# Generate a new response if last message is not from assistant
+if st.session_state.messages[-1]["role"] != "assistant":
+    with st.chat_message("assistant"):
+        with st.spinner("Hold on, I'm fetching the latest Legal advice for you..."):
+            response = get_response(prompt)
+            placeholder = st.empty()
+            full_response = response  # Directly use the response
+            placeholder.markdown(full_response)
+    message = {"role": "assistant", "content": full_response}
+    st.session_state.messages.append(message)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+chromadb== 0.5.3
+pysqlite3-binary
+protobuf==3.20.*
+streamlit==1.36.0
+pypdf==4.2.0
+langchain==0.2.5
+langchain-community==0.2.5
+langchain-huggingface==0.0.3
+python-dotenv

vector_embeddings.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+import os
+#from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from dotenv import load_dotenv
+from collections import OrderedDict
+# Load environment variables from .env file
+load_dotenv()
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Load the PDF
+loader = PyPDFLoader("Dataset.pdf")  # Provide your PDF path here
+documents = loader.load()
+# Split the text
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
+texts = text_splitter.split_documents(documents)
+# Initialize the embedding model
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# Convert texts to embeddings
+try:
+    embeddings = embedding_model.embed_documents([doc.page_content for doc in texts])
+    print("Vector Embeddings created successfully")
+except Exception as e:
+    print(f"Error creating vector embeddings: {e}")
+# Initialize Chroma vector store
+vector_store = Chroma(embedding_function=embedding_model, persist_directory="data")
+# Add documents to the vector store
+vector_store.add_documents(documents=texts)
+# Validate the setup
+try:
+    # Test query to validate data retrieval
+    test_query = "What are some popular items for winter?"
+    results = vector_store.search(query=test_query, search_type='similarity')
+    # Deduplicate results
+    unique_results = OrderedDict()
+    for doc in results:
+        if doc.page_content not in unique_results:
+            unique_results[doc.page_content] = doc
+    # Convert unique results to a list and limit to top 3
+    final_results = list(unique_results.values())[:3]
+    print(f"Unique query results: {final_results}")
+except Exception as e:
+    print(f"Error during test query: {e}")