Spaces:
Running
Running
import streamlit as st | |
import os | |
import time | |
import zipfile | |
# Function to extract the pdf_data.zip file | |
def extract_pdf_data(): | |
if not os.path.exists('./pdf_data'): | |
with zipfile.ZipFile('pdf_data.zip', 'r') as zip_ref: | |
zip_ref.extractall('.') | |
# Extract the pdf_data folder if not already extracted | |
extract_pdf_data() | |
from langchain_groq import ChatGroq | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain.chains import create_retrieval_chain | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.document_loaders import PyPDFDirectoryLoader | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() | |
## load the GROQ And OpenAI API KEY | |
groq_api_key=os.getenv('GROQ_API_KEY') | |
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY") | |
st.title("BHARATIYA NAGARIK SURAKSHA SANHITA(BNSS) LLM") | |
llm=ChatGroq(groq_api_key=groq_api_key, | |
model_name="Llama3-8b-8192") | |
prompt=ChatPromptTemplate.from_template( | |
""" | |
1. Check BHARATIYA NAGARIK SURAKSHA SANHITA Section: First, analyze the given input and determine which section of the BHARATIYA NAGARIK SURAKSHA SANHITA (BNSS) it fits into. Check input properly because some words may not match with BHARATIYA NAGARIK SURAKSHA SANHITA terminology, so you still need to understand the context. | |
2. Register FIR: After determining the appropriate BNSS section, confirm the registration of the First Information Report (FIR) under that section. | |
3. Outline Procedure: Provide a brief outline of the procedure that will follow after the FIR registration, based on the identified BNSS section. | |
4. Punishment: Specify the punishment associated with the identified BNSS section. | |
5. Additional Recommendations: Suggest any specific actions or precautions that the police should take based on the nature of the case. | |
In final response Don't use IPC or indian penal code always use BNSS, BHARATIYA NAGARIK SURAKSHA SANHITA and you will only provide in details: {input}, Crime input, FIR Register under which section, Outline Procedure, Punishment, and Additional Recommendations. | |
<context> | |
{context} | |
<context> | |
Questions: {input} | |
""" | |
) | |
def vector_embedding(): | |
st.write("Please wait, the LLM is getting ready!") | |
if "vectors" not in st.session_state: | |
st.session_state.embeddings=GoogleGenerativeAIEmbeddings(model = "models/embedding-001") | |
st.session_state.loader=PyPDFDirectoryLoader("./pdf_data") ## Data Ingestion | |
st.session_state.docs=st.session_state.loader.load() ## Document Loading | |
st.session_state.text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) ## Chunk Creation | |
st.session_state.final_documents=st.session_state.text_splitter.split_documents(st.session_state.docs[:20]) #splitting | |
st.session_state.vectors=FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings) #vector OpenAI embeddings | |
vector_embedding() | |
st.write("The LLM is ready. You can use it now!") | |
with st.form(key='input_form'): | |
prompt1 = st.text_input("Please Enter Your Question.") | |
submit_button = st.form_submit_button(label='Submit') | |
if submit_button and prompt1: | |
document_chain=create_stuff_documents_chain(llm,prompt) | |
retriever=st.session_state.vectors.as_retriever() | |
retrieval_chain=create_retrieval_chain(retriever,document_chain) | |
start=time.process_time() | |
response=retrieval_chain.invoke({'input':prompt1}) | |
print("Response time :",time.process_time()-start) | |
st.write(response['answer']) | |
# With a streamlit expander | |
with st.expander("Document Similarity Search"): | |
# Find the relevant chunks | |
for i, doc in enumerate(response["context"]): | |
st.write(doc.page_content) | |
st.write("--------------------------------") | |