Poonawala commited on
Commit
b947dc9
·
verified ·
1 Parent(s): 6c9ef1b

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.py +17 -0
  2. embeddings.py +30 -0
  3. llm_chain.py +79 -0
  4. req.txt +6 -0
config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_openai import OpenAIEmbeddings
3
+
4
+ import os
5
+ from dotenv import load_dotenv
6
+ load_dotenv()
7
+
8
+ # os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
9
+ os.environ["OPENAI_API_KEY"] = ""
10
+ model_name = os.getenv("MODEL_NAME")
11
+ mini_model_name = os.getenv("MINI_MODEL_NAME")
12
+ embedding_model = os.getenv("EMBEDDING_MODEL")
13
+
14
+ llm = ChatOpenAI(model=model_name)
15
+ llm2 = ChatOpenAI(model=mini_model_name)
16
+
17
+ embeddings = OpenAIEmbeddings(model=embedding_model)
embeddings.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyMuPDFLoader
2
+ from langchain_community.vectorstores import FAISS
3
+ from config import embeddings
4
+
5
+ def create_vectoreDB(file_path:str):
6
+ try:
7
+ loader = PyMuPDFLoader(file_path=file_path)
8
+ documents = loader.load()
9
+
10
+ # Process the text to remove "\n\n" and " "
11
+ for doc in documents:
12
+ doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
13
+
14
+ vectorstore = FAISS.from_documents(
15
+ documents,
16
+ embedding=embeddings
17
+ )
18
+
19
+ path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
20
+ vectorstore.save_local(path)
21
+
22
+ print(f"VectoreStore has been created at: {path}")
23
+ return {"status": "completed"}
24
+
25
+ except Exception as e:
26
+ print(str(e))
27
+ return None
28
+
29
+
30
+ # create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")
llm_chain.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import FAISS
2
+ from config import embeddings, llm, llm2
3
+ from pprint import pprint
4
+ from langchain.retrievers.document_compressors import FlashrankRerank
5
+ from langchain.retrievers import ContextualCompressionRetriever
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+
8
+ compressor = FlashrankRerank()
9
+
10
+ def get_vectoreDB(file_path:str):
11
+
12
+ path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
13
+ vectoreStore = FAISS.load_local(
14
+ path,
15
+ embeddings,
16
+ allow_dangerous_deserialization=True
17
+ )
18
+
19
+ vectoreStore_as_retriever = vectoreStore.as_retriever(
20
+ search_type="similarity",
21
+ search_kwargs={
22
+ "k": 10,
23
+ "include_metadata": True
24
+ }
25
+ )
26
+
27
+ #ReRanker
28
+ compression_retriever = ContextualCompressionRetriever(
29
+ base_compressor=compressor,
30
+ base_retriever=vectoreStore_as_retriever
31
+ )
32
+
33
+ return compression_retriever
34
+
35
+
36
+ system_prompt = """
37
+ You are a Medical Chatbot named as Medi. Your task is to assist as a doctor.
38
+
39
+ Your tasks are given below:
40
+ - Answer the given question from the given context. If the question is out of context from medical feild, just tell the user "I don't know about it".
41
+ - If the question is related to medical field and not relevent to context, then generate by your own self
42
+ - If the question is about the context given, explain the user according to the most relevent context.
43
+ - Explain the user a proper short Answer, don't copy the whole context, explain it by your self.
44
+ - If someone ask your name, so tell them
45
+
46
+ Remember to answer in a precise way and to the point as a doctor.
47
+
48
+ query: '''{query}'''
49
+ """
50
+
51
+ PROMPT = ChatPromptTemplate.from_messages(
52
+ [
53
+ ("system", system_prompt),
54
+ ("human", "context: '''{context}'''")
55
+ ]
56
+ )
57
+
58
+ llm_chain = PROMPT | llm
59
+ mini_llm_chain = PROMPT | llm2
60
+
61
+ vectoreDB = get_vectoreDB("data/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf")
62
+
63
+ def ask_ai(query:str, model_name:str):
64
+ docs = vectoreDB.invoke(query)
65
+ context_text = "\n\n---\n\n".join([doc.page_content for doc in docs])
66
+
67
+ if model_name == "4o":
68
+ llm = llm_chain
69
+ elif model_name == "4o-mini":
70
+ llm = mini_llm_chain
71
+ else:
72
+ return "No model found"
73
+
74
+ response = llm.invoke({
75
+ "context": context_text,
76
+ "query" : query
77
+ }
78
+ )
79
+ return response.content
req.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain-openai==0.2.11
2
+ python-dotenv==1.0.1
3
+ langchain-community==0.3.5
4
+ pymupdf==1.24.9
5
+ faiss-cpu==1.8.0
6
+ flashrank==0.2.5