Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,123 @@
|
|
1 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from pydantic import BaseModel
|
3 |
-
|
4 |
-
from langchain.callbacks.manager import CallbackManager
|
5 |
-
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
6 |
|
7 |
-
app = FastAPI()
|
8 |
-
MODEL_NAME = 'tinyllama'
|
9 |
|
10 |
-
|
11 |
-
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
12 |
-
return Ollama(model=MODEL_NAME, callback_manager=callback_manager)
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
@app.get("/")
|
18 |
-
def
|
19 |
-
return {"message":
|
20 |
-
|
21 |
-
@app.post("/
|
22 |
-
def
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.prompts import PromptTemplate
|
2 |
+
from langchain.chains import create_retrieval_chain
|
3 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
4 |
+
# import gradio as gr
|
5 |
+
# from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
6 |
+
import numpy as np
|
7 |
+
from langchain_ollama import OllamaLLM
|
8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
+
# from langchain_community.llms import HuggingFacePipeline
|
10 |
+
# from load_document import load_data
|
11 |
+
# from split_document import split_docs
|
12 |
+
# from embed_docs import embed_docs
|
13 |
+
# from retrieve import retrieve
|
14 |
+
from datetime import datetime
|
15 |
+
# from js import js
|
16 |
+
# from theme import theme
|
17 |
+
import os
|
18 |
+
import glob
|
19 |
+
from fastapi import FastAPI, Query, Request
|
20 |
from pydantic import BaseModel
|
21 |
+
import uvicorn
|
|
|
|
|
22 |
|
|
|
|
|
23 |
|
24 |
+
app = FastAPI(title="Know The Law", description="A FastAPI application for legal assistance using AI.")
|
|
|
|
|
25 |
|
26 |
+
|
27 |
+
|
28 |
+
vector_store_path = "/home/user/VectorStoreDB"
|
29 |
+
index_name = "faiss_index"
|
30 |
+
full_index_path = os.path.join(vector_store_path, index_name)
|
31 |
+
|
32 |
+
# # Create the embedder with a specific model
|
33 |
+
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
34 |
+
|
35 |
+
# # Initialize our speech pipeline
|
36 |
+
# transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device="cpu")
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
def fetch_doc():
|
42 |
+
# Adjust the path as needed, e.g., './' for current directory
|
43 |
+
pdf_files = glob.glob("Document/*.pdf")
|
44 |
+
return pdf_files
|
45 |
+
|
46 |
+
# # Define llm
|
47 |
+
hf_token = os.environ.get("HF_TOKEN").strip() # Ensure to set your Hugging Face token in the environment variable HF_TOKEN
|
48 |
+
llm = OllamaLLM(model="mistral:7b-instruct")
|
49 |
+
|
50 |
+
|
51 |
+
pdf_files = fetch_doc() #Fetch Dataset
|
52 |
+
chunks = None
|
53 |
+
loaded_docs = []
|
54 |
+
# just query if it exists
|
55 |
+
if not os.path.exists(full_index_path):
|
56 |
+
for doc in pdf_files:
|
57 |
+
print(f"Loading.....{doc}")
|
58 |
+
docs = load_data(doc) #Load Dataset
|
59 |
+
loaded_docs.append(docs)
|
60 |
+
final_docs = [item for sublist in loaded_docs for item in sublist] # Flatten the list
|
61 |
+
chunks = split_docs(final_docs, embedder=embedder) #Split Document
|
62 |
+
saved_vector = embed_docs(chunks, embedder=embedder) #Embed Document
|
63 |
+
retrieved = retrieve(saved_vector) # Retrieve simimlar docs
|
64 |
+
|
65 |
+
# Define the prompt template
|
66 |
+
prompt = """
|
67 |
+
You are The Law Assistant, an AI trained to help Nigerians understand their legal rights and obligations. Using the provided context below, answer user questions related to Nigerian law.
|
68 |
+
|
69 |
+
Instructions:
|
70 |
+
|
71 |
+
1. Base your responses strictly on the given context or verified legal sources.
|
72 |
+
|
73 |
+
2. If the answer is not in the context and you're unsure, respond with: "I don't know based on the available information." Do not fabricate or speculate.
|
74 |
+
|
75 |
+
3. Keep your answers clear, concise, and jargon-free.
|
76 |
+
|
77 |
+
4. Always cite the legal source(s) or reference(s) you used (e.g., constitution section, legal act, court ruling).
|
78 |
+
|
79 |
+
Context: {context}
|
80 |
+
Question: {{question}}
|
81 |
+
|
82 |
+
Helpful Answer:"""
|
83 |
+
|
84 |
+
|
85 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template=prompt)
|
86 |
+
|
87 |
+
# Create document prompt
|
88 |
+
document_prompt = PromptTemplate(
|
89 |
+
input_variables=["page_content", "source"],
|
90 |
+
template="Context:\ncontent:{page_content}\nsource:{source}",
|
91 |
+
)
|
92 |
+
|
93 |
+
# Create the stuff documents chain
|
94 |
+
combine_docs_chain = create_stuff_documents_chain(
|
95 |
+
llm,
|
96 |
+
QA_CHAIN_PROMPT,
|
97 |
+
document_prompt=document_prompt
|
98 |
+
)
|
99 |
+
|
100 |
+
# Create the retrieval chain
|
101 |
+
qa_chain = create_retrieval_chain(
|
102 |
+
retriever=retrieved,
|
103 |
+
combine_docs_chain=combine_docs_chain
|
104 |
+
)
|
105 |
+
|
106 |
+
class QueryRequest(BaseModel):
|
107 |
+
question: str
|
108 |
|
109 |
@app.get("/")
|
110 |
+
def home():
|
111 |
+
return {"message": "Welcome to the Know The Law API. Use POST /query to ask legal questions."}
|
112 |
+
|
113 |
+
@app.post("/query")
|
114 |
+
def respond(query: QueryRequest):
|
115 |
+
# Invoke the chain with the question
|
116 |
+
question = query.question
|
117 |
+
result = qa_chain.invoke({"input":question})
|
118 |
+
|
119 |
+
# Return the answer
|
120 |
+
return {"answer": result['answer']}
|
121 |
+
|
122 |
+
|
123 |
+
|