giridharnair01 commited on
Commit
3407303
·
verified ·
1 Parent(s): 434fd65

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain.llms import HuggingFacePipeline
5
+ from langchain.chains import RetrievalQA
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
+ import json
8
+ import os
9
+
10
+ # Define JSON file path
11
+ dataset_path = "/mnt/data/constitution_qa.json"
12
+
13
+ # Load JSON dataset
14
+ with open(dataset_path, "r", encoding="utf-8") as f:
15
+ data = json.load(f)
16
+
17
+ # Extract questions and answers
18
+ texts = [f"Q: {item['question']}\nA: {item['answer']}" for item in data]
19
+
20
+ # Load the embedding model
21
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
22
+
23
+ # Create FAISS vector database
24
+ vector_db = FAISS.from_texts(texts, embeddings)
25
+
26
+ # Load Open-Source LLM (LLaMA-2 7B Open Chat Model)
27
+ tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
28
+ model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
29
+ text_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
30
+ llm = HuggingFacePipeline(pipeline=text_pipeline)
31
+
32
+ # Create RAG pipeline
33
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_db.as_retriever())
34
+
35
+ # Streamlit UI
36
+ st.title("Indian Constitution Q&A RAG App")
37
+ query = st.text_input("Enter your legal query:")
38
+
39
+ if query:
40
+ response = qa_chain.run(query)
41
+ st.write("### AI-Generated Answer:")
42
+ st.write(response)