Spaces:
Sleeping
Sleeping
AnkitPatil
commited on
Upload 5 files
Browse files- .gitattributes +1 -0
- DOC From Adv.pdf +3 -0
- README.md +4 -4
- app.py +153 -0
- requirements.txt +9 -0
- vector_embeddings.py +56 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
DOC[[:space:]]From[[:space:]]Adv.pdf filter=lfs diff=lfs merge=lfs -text
|
DOC From Adv.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be83c647a805649bf42a3587664975ad1df26d24517654aba3a21cd6141a7acc
|
3 |
+
size 2246483
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
+
title: Lexify
|
3 |
+
emoji: 🏆
|
4 |
colorFrom: purple
|
5 |
+
colorTo: purple
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.36.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
3 |
+
from langchain_community.vectorstores import Chroma
|
4 |
+
from langchain_community.llms import HuggingFaceHub
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
import warnings, os
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
warnings.filterwarnings("ignore")
|
10 |
+
|
11 |
+
import sys
|
12 |
+
|
13 |
+
warnings.filterwarnings("ignore")
|
14 |
+
__import__('pysqlite3')
|
15 |
+
import sys
|
16 |
+
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
17 |
+
|
18 |
+
# Load environment variables from .env file
|
19 |
+
load_dotenv()
|
20 |
+
|
21 |
+
data_directory = os.path.join(os.path.dirname(__file__), "data")
|
22 |
+
|
23 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
24 |
+
# st.secrets["huggingface_api_token"] # Don't forget to add your hugging face token
|
25 |
+
|
26 |
+
# Load the vector store from disk
|
27 |
+
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
28 |
+
vector_store = Chroma(embedding_function=embedding_model, persist_directory=data_directory)
|
29 |
+
|
30 |
+
# Initialize the Hugging Face Hub LLM
|
31 |
+
hf_hub_llm = HuggingFaceHub(
|
32 |
+
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
|
33 |
+
model_kwargs={"temperature": 1, "max_new_tokens":1024},
|
34 |
+
)
|
35 |
+
|
36 |
+
prompt_template = """
|
37 |
+
You are an AI chatbot specializing in the domain of law,
|
38 |
+
focusing on the recent changes made by the Indian government on July 1 2024 from the old THE INDIAN PENAL CODE(IPC) law to the new Bharatiya Nyaya Sanhita(BNS) law, 2023.
|
39 |
+
Your task is to provide information about this transition.
|
40 |
+
Here are your specific instructions:
|
41 |
+
|
42 |
+
1. **Simple Definitions**: Provide a brief, easy-to-understand definition of the BNS law for the general public.
|
43 |
+
2. **Codes Comparison**: Share the sections and clauses for both the IPC and the BNS, highlighting the changes.
|
44 |
+
3. **Punishments and Revisions**: Detail the punishments, penalties, and any improvements or revisions made in the BNS law.
|
45 |
+
4. **Detailed Comparison**: Conduct a comprehensive comparison between the IPC and the BNS.
|
46 |
+
5. **Articles and Videos**: Include references to relevant articles and videos discussing the new BNS law from authoritative sources.
|
47 |
+
|
48 |
+
Ensure the information is accurate, concise, and accessible to users with varying levels of legal knowledge.
|
49 |
+
|
50 |
+
Now, when the user interacts with you by saying 'hi', 'hello', or 'how are you', respond in an interactive manner to engage them effectively in a single line.
|
51 |
+
Do not call yourself as chatbot, call yourself as Lexify.
|
52 |
+
|
53 |
+
User Query:
|
54 |
+
{context}
|
55 |
+
|
56 |
+
Question: {question}
|
57 |
+
Answer:
|
58 |
+
"""
|
59 |
+
|
60 |
+
custom_prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
61 |
+
|
62 |
+
rag_chain = RetrievalQA.from_chain_type(
|
63 |
+
llm=hf_hub_llm,
|
64 |
+
chain_type="stuff",
|
65 |
+
retriever=vector_store.as_retriever(top_k=3), # retriever is set to fetch top 3 results
|
66 |
+
chain_type_kwargs={"prompt": custom_prompt})
|
67 |
+
|
68 |
+
def get_response(question):
|
69 |
+
result = rag_chain({"query": question})
|
70 |
+
response_text = result["result"]
|
71 |
+
answer_start = response_text.find("Answer:") + len("Answer:")
|
72 |
+
answer = response_text[answer_start:].strip()
|
73 |
+
return answer
|
74 |
+
|
75 |
+
# Streamlit app
|
76 |
+
# Remove whitespace from the top of the page and sidebar
|
77 |
+
st.markdown(
|
78 |
+
"""
|
79 |
+
<style>
|
80 |
+
.appview-container .main .block-container {{
|
81 |
+
padding-top: {padding_top}rem;
|
82 |
+
padding-bottom: {padding_bottom}rem;
|
83 |
+
}}
|
84 |
+
|
85 |
+
</style>""".format(
|
86 |
+
padding_top=1, padding_bottom=1
|
87 |
+
),
|
88 |
+
unsafe_allow_html=True,
|
89 |
+
)
|
90 |
+
|
91 |
+
|
92 |
+
st.markdown("""
|
93 |
+
<h3 style='text-align: left; color: black; padding-top: 35px; border-bottom: 3px solid red;'>
|
94 |
+
LexifyAI: Your Personal Law Assistant
|
95 |
+
</h3>""", unsafe_allow_html=True)
|
96 |
+
|
97 |
+
side_bar_message = """
|
98 |
+
Hi! 👋 I'm here to help you with your Law Queries. What would you like to know or explore?
|
99 |
+
\nHere are some areas you might be interested in:
|
100 |
+
1. **IPC Laws**
|
101 |
+
2. **BNS Laws**
|
102 |
+
3. **Comparing Both**
|
103 |
+
4. **And Many More** 🌞
|
104 |
+
|
105 |
+
Feel free to ask me anything about Law and Justice!
|
106 |
+
"""
|
107 |
+
|
108 |
+
with st.sidebar:
|
109 |
+
st.title('🤖LexifyAI')
|
110 |
+
st.markdown(side_bar_message)
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
initial_message = """
|
118 |
+
Hi there! I'm your Law and Justice Bot 🤖
|
119 |
+
Here are some questions you might ask me:\n
|
120 |
+
⚖️ When BNS Law was made?\n
|
121 |
+
⚖️ What is IPC?\n
|
122 |
+
⚖️ On which date BNS was Implemented in the Country?\n
|
123 |
+
"""
|
124 |
+
|
125 |
+
# Store LLM generated responses
|
126 |
+
if "messages" not in st.session_state.keys():
|
127 |
+
st.session_state.messages = [{"role": "assistant", "content": initial_message}]
|
128 |
+
|
129 |
+
# Display or clear chat messages
|
130 |
+
for message in st.session_state.messages:
|
131 |
+
with st.chat_message(message["role"]):
|
132 |
+
st.markdown(message["content"])
|
133 |
+
|
134 |
+
def clear_chat_history():
|
135 |
+
st.session_state.messages = [{"role": "assistant", "content": initial_message}]
|
136 |
+
st.button('Clear Chat', on_click=clear_chat_history)
|
137 |
+
|
138 |
+
# User-provided prompt
|
139 |
+
if prompt := st.chat_input():
|
140 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
141 |
+
with st.chat_message("user"):
|
142 |
+
st.markdown(prompt)
|
143 |
+
|
144 |
+
# Generate a new response if last message is not from assistant
|
145 |
+
if st.session_state.messages[-1]["role"] != "assistant":
|
146 |
+
with st.chat_message("assistant"):
|
147 |
+
with st.spinner("Hold on, I'm fetching the latest Legal advice for you..."):
|
148 |
+
response = get_response(prompt)
|
149 |
+
placeholder = st.empty()
|
150 |
+
full_response = response # Directly use the response
|
151 |
+
placeholder.markdown(full_response)
|
152 |
+
message = {"role": "assistant", "content": full_response}
|
153 |
+
st.session_state.messages.append(message)
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chromadb== 0.5.3
|
2 |
+
pysqlite3-binary
|
3 |
+
protobuf==3.20.*
|
4 |
+
streamlit==1.36.0
|
5 |
+
pypdf==4.2.0
|
6 |
+
langchain==0.2.5
|
7 |
+
langchain-community==0.2.5
|
8 |
+
langchain-huggingface==0.0.3
|
9 |
+
python-dotenv
|
vector_embeddings.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.document_loaders import PyPDFLoader
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
4 |
+
from langchain_chroma import Chroma
|
5 |
+
import os
|
6 |
+
#from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from collections import OrderedDict
|
10 |
+
|
11 |
+
# Load environment variables from .env file
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
15 |
+
|
16 |
+
# Load the PDF
|
17 |
+
loader = PyPDFLoader("Dataset.pdf") # Provide your PDF path here
|
18 |
+
documents = loader.load()
|
19 |
+
|
20 |
+
# Split the text
|
21 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
|
22 |
+
texts = text_splitter.split_documents(documents)
|
23 |
+
|
24 |
+
# Initialize the embedding model
|
25 |
+
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
26 |
+
|
27 |
+
# Convert texts to embeddings
|
28 |
+
try:
|
29 |
+
embeddings = embedding_model.embed_documents([doc.page_content for doc in texts])
|
30 |
+
print("Vector Embeddings created successfully")
|
31 |
+
except Exception as e:
|
32 |
+
print(f"Error creating vector embeddings: {e}")
|
33 |
+
|
34 |
+
# Initialize Chroma vector store
|
35 |
+
vector_store = Chroma(embedding_function=embedding_model, persist_directory="data")
|
36 |
+
|
37 |
+
# Add documents to the vector store
|
38 |
+
vector_store.add_documents(documents=texts)
|
39 |
+
|
40 |
+
# Validate the setup
|
41 |
+
try:
|
42 |
+
# Test query to validate data retrieval
|
43 |
+
test_query = "What are some popular items for winter?"
|
44 |
+
results = vector_store.search(query=test_query, search_type='similarity')
|
45 |
+
|
46 |
+
# Deduplicate results
|
47 |
+
unique_results = OrderedDict()
|
48 |
+
for doc in results:
|
49 |
+
if doc.page_content not in unique_results:
|
50 |
+
unique_results[doc.page_content] = doc
|
51 |
+
|
52 |
+
# Convert unique results to a list and limit to top 3
|
53 |
+
final_results = list(unique_results.values())[:3]
|
54 |
+
print(f"Unique query results: {final_results}")
|
55 |
+
except Exception as e:
|
56 |
+
print(f"Error during test query: {e}")
|