Upload 4 files
Browse files- config.py +17 -0
- embeddings.py +30 -0
- llm_chain.py +79 -0
- req.txt +6 -0
config.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import ChatOpenAI
|
2 |
+
from langchain_openai import OpenAIEmbeddings
|
3 |
+
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
9 |
+
os.environ["OPENAI_API_KEY"] = ""
|
10 |
+
model_name = os.getenv("MODEL_NAME")
|
11 |
+
mini_model_name = os.getenv("MINI_MODEL_NAME")
|
12 |
+
embedding_model = os.getenv("EMBEDDING_MODEL")
|
13 |
+
|
14 |
+
llm = ChatOpenAI(model=model_name)
|
15 |
+
llm2 = ChatOpenAI(model=mini_model_name)
|
16 |
+
|
17 |
+
embeddings = OpenAIEmbeddings(model=embedding_model)
|
embeddings.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
2 |
+
from langchain_community.vectorstores import FAISS
|
3 |
+
from config import embeddings
|
4 |
+
|
5 |
+
def create_vectoreDB(file_path:str):
|
6 |
+
try:
|
7 |
+
loader = PyMuPDFLoader(file_path=file_path)
|
8 |
+
documents = loader.load()
|
9 |
+
|
10 |
+
# Process the text to remove "\n\n" and " "
|
11 |
+
for doc in documents:
|
12 |
+
doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
|
13 |
+
|
14 |
+
vectorstore = FAISS.from_documents(
|
15 |
+
documents,
|
16 |
+
embedding=embeddings
|
17 |
+
)
|
18 |
+
|
19 |
+
path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
|
20 |
+
vectorstore.save_local(path)
|
21 |
+
|
22 |
+
print(f"VectoreStore has been created at: {path}")
|
23 |
+
return {"status": "completed"}
|
24 |
+
|
25 |
+
except Exception as e:
|
26 |
+
print(str(e))
|
27 |
+
return None
|
28 |
+
|
29 |
+
|
30 |
+
# create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")
|
llm_chain.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.vectorstores import FAISS
|
2 |
+
from config import embeddings, llm, llm2
|
3 |
+
from pprint import pprint
|
4 |
+
from langchain.retrievers.document_compressors import FlashrankRerank
|
5 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
6 |
+
from langchain_core.prompts import ChatPromptTemplate
|
7 |
+
|
8 |
+
compressor = FlashrankRerank()
|
9 |
+
|
10 |
+
def get_vectoreDB(file_path:str):
|
11 |
+
|
12 |
+
path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
|
13 |
+
vectoreStore = FAISS.load_local(
|
14 |
+
path,
|
15 |
+
embeddings,
|
16 |
+
allow_dangerous_deserialization=True
|
17 |
+
)
|
18 |
+
|
19 |
+
vectoreStore_as_retriever = vectoreStore.as_retriever(
|
20 |
+
search_type="similarity",
|
21 |
+
search_kwargs={
|
22 |
+
"k": 10,
|
23 |
+
"include_metadata": True
|
24 |
+
}
|
25 |
+
)
|
26 |
+
|
27 |
+
#ReRanker
|
28 |
+
compression_retriever = ContextualCompressionRetriever(
|
29 |
+
base_compressor=compressor,
|
30 |
+
base_retriever=vectoreStore_as_retriever
|
31 |
+
)
|
32 |
+
|
33 |
+
return compression_retriever
|
34 |
+
|
35 |
+
|
36 |
+
system_prompt = """
|
37 |
+
You are a Medical Chatbot named as Medi. Your task is to assist as a doctor.
|
38 |
+
|
39 |
+
Your tasks are given below:
|
40 |
+
- Answer the given question from the given context. If the question is out of context from medical feild, just tell the user "I don't know about it".
|
41 |
+
- If the question is related to medical field and not relevent to context, then generate by your own self
|
42 |
+
- If the question is about the context given, explain the user according to the most relevent context.
|
43 |
+
- Explain the user a proper short Answer, don't copy the whole context, explain it by your self.
|
44 |
+
- If someone ask your name, so tell them
|
45 |
+
|
46 |
+
Remember to answer in a precise way and to the point as a doctor.
|
47 |
+
|
48 |
+
query: '''{query}'''
|
49 |
+
"""
|
50 |
+
|
51 |
+
PROMPT = ChatPromptTemplate.from_messages(
|
52 |
+
[
|
53 |
+
("system", system_prompt),
|
54 |
+
("human", "context: '''{context}'''")
|
55 |
+
]
|
56 |
+
)
|
57 |
+
|
58 |
+
llm_chain = PROMPT | llm
|
59 |
+
mini_llm_chain = PROMPT | llm2
|
60 |
+
|
61 |
+
vectoreDB = get_vectoreDB("data/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf")
|
62 |
+
|
63 |
+
def ask_ai(query:str, model_name:str):
|
64 |
+
docs = vectoreDB.invoke(query)
|
65 |
+
context_text = "\n\n---\n\n".join([doc.page_content for doc in docs])
|
66 |
+
|
67 |
+
if model_name == "4o":
|
68 |
+
llm = llm_chain
|
69 |
+
elif model_name == "4o-mini":
|
70 |
+
llm = mini_llm_chain
|
71 |
+
else:
|
72 |
+
return "No model found"
|
73 |
+
|
74 |
+
response = llm.invoke({
|
75 |
+
"context": context_text,
|
76 |
+
"query" : query
|
77 |
+
}
|
78 |
+
)
|
79 |
+
return response.content
|
req.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain-openai==0.2.11
|
2 |
+
python-dotenv==1.0.1
|
3 |
+
langchain-community==0.3.5
|
4 |
+
pymupdf==1.24.9
|
5 |
+
faiss-cpu==1.8.0
|
6 |
+
flashrank==0.2.5
|