India_Law_LLM / app.py
Dezzex2's picture
Update app.py
8fb46dd verified
raw
history blame
4.02 kB
import streamlit as st
import os
import time
import zipfile
# Function to extract the pdf_data.zip file
def extract_pdf_data():
if not os.path.exists('./pdf_data'):
with zipfile.ZipFile('pdf_data.zip', 'r') as zip_ref:
zip_ref.extractall('.')
# Extract the pdf_data folder if not already extracted
extract_pdf_data()
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import os
load_dotenv()
## load the GROQ And OpenAI API KEY
groq_api_key=os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")
st.title("BHARATIYA NAGARIK SURAKSHA SANHITA(BNSS) LLM")
llm=ChatGroq(groq_api_key=groq_api_key,
model_name="Llama3-8b-8192")
prompt=ChatPromptTemplate.from_template(
"""
1. Check BHARATIYA NAGARIK SURAKSHA SANHITA Section: First, analyze the given input and determine which section of the BHARATIYA NAGARIK SURAKSHA SANHITA (BNSS) it fits into. Check input properly because some words may not match with BHARATIYA NAGARIK SURAKSHA SANHITA terminology, so you still need to understand the context.
2. Register FIR: After determining the appropriate BNSS section, confirm the registration of the First Information Report (FIR) under that section.
3. Outline Procedure: Provide a brief outline of the procedure that will follow after the FIR registration, based on the identified BNSS section.
4. Punishment: Specify the punishment associated with the identified BNSS section.
5. Additional Recommendations: Suggest any specific actions or precautions that the police should take based on the nature of the case.
In final response Don't use IPC or indian penal code always use BNSS, BHARATIYA NAGARIK SURAKSHA SANHITA and you will only provide in details: {input}, Crime input, FIR Register under which section, Outline Procedure, Punishment, and Additional Recommendations.
<context>
{context}
<context>
Questions: {input}
"""
)
def vector_embedding():
st.write("Please wait, the LLM is getting ready!")
if "vectors" not in st.session_state:
st.session_state.embeddings=GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
st.session_state.loader=PyPDFDirectoryLoader("./pdf_data") ## Data Ingestion
st.session_state.docs=st.session_state.loader.load() ## Document Loading
st.session_state.text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) ## Chunk Creation
st.session_state.final_documents=st.session_state.text_splitter.split_documents(st.session_state.docs[:20]) #splitting
st.session_state.vectors=FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings) #vector OpenAI embeddings
vector_embedding()
st.write("The LLM is ready. You can use it now!")
with st.form(key='input_form'):
prompt1 = st.text_input("Please Enter Your Question.")
submit_button = st.form_submit_button(label='Submit')
if submit_button and prompt1:
document_chain=create_stuff_documents_chain(llm,prompt)
retriever=st.session_state.vectors.as_retriever()
retrieval_chain=create_retrieval_chain(retriever,document_chain)
start=time.process_time()
response=retrieval_chain.invoke({'input':prompt1})
print("Response time :",time.process_time()-start)
st.write(response['answer'])
# With a streamlit expander
with st.expander("Document Similarity Search"):
# Find the relevant chunks
for i, doc in enumerate(response["context"]):
st.write(doc.page_content)
st.write("--------------------------------")