import streamlit as st from langchain_community.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms import HuggingFacePipeline from langchain.chains import RetrievalQA from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import json # Streamlit UI st.title("Indian Constitution Q&A RAG App") # Upload JSON File uploaded_file = st.file_uploader("Upload Constitution JSON", type="json") if uploaded_file is not None: # Load JSON dataset data = json.load(uploaded_file) # Extract questions and answers texts = [f"Q: {item['question']}\nA: {item['answer']}" for item in data] # Load the embedding model embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Create FAISS vector database vector_db = FAISS.from_texts(texts, embeddings) # Load Open-Source LLM (LLaMA-2 7B Open Chat Model) tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf") model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf") text_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) llm = HuggingFacePipeline(pipeline=text_pipeline) # Create RAG pipeline qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_db.as_retriever()) query = st.text_input("Enter your legal query:") if query: response = qa_chain.run(query) st.write("### AI-Generated Answer:") st.write(response)