Spaces:

Poonawala
/

Chatbot

Sleeping

App Files Files Community

Poonawala commited on Dec 8, 2024

Commit

b947dc9

verified ·

1 Parent(s): 6c9ef1b

Upload 4 files

Browse files

Files changed (4) hide show

config.py +17 -0
embeddings.py +30 -0
llm_chain.py +79 -0
req.txt +6 -0

config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+os.environ["OPENAI_API_KEY"] = ""
+model_name = os.getenv("MODEL_NAME")
+mini_model_name = os.getenv("MINI_MODEL_NAME")
+embedding_model = os.getenv("EMBEDDING_MODEL")
+llm = ChatOpenAI(model=model_name)
+llm2 = ChatOpenAI(model=mini_model_name)
+embeddings = OpenAIEmbeddings(model=embedding_model)

embeddings.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_community.vectorstores import FAISS
+from config import embeddings
+def create_vectoreDB(file_path:str):
+        try:
+            loader = PyMuPDFLoader(file_path=file_path)
+            documents = loader.load()
+            # Process the text to remove "\n\n" and "   "
+            for doc in documents:
+                doc.page_content = doc.page_content.replace("\n \n", "").replace("   ", "").replace("----", "").replace("====", "")
+            vectorstore = FAISS.from_documents(
+                                            documents,
+                                            embedding=embeddings
+                                        )
+            path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
+            vectorstore.save_local(path)
+            print(f"VectoreStore has been created at: {path}")
+            return {"status": "completed"}
+        except Exception as e:
+            print(str(e))
+            return None
+# create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")

llm_chain.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from langchain_community.vectorstores import FAISS
+from config import embeddings, llm, llm2
+from pprint import pprint
+from langchain.retrievers.document_compressors import FlashrankRerank
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain_core.prompts import ChatPromptTemplate
+compressor = FlashrankRerank()
+def get_vectoreDB(file_path:str):
+    path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
+    vectoreStore = FAISS.load_local(
+                            path,
+                            embeddings,
+                            allow_dangerous_deserialization=True
+                        )
+    vectoreStore_as_retriever = vectoreStore.as_retriever(
+                                    search_type="similarity",
+                                    search_kwargs={
+                                        "k": 10,
+                                        "include_metadata": True
+                                    }
+                                )
+    #ReRanker
+    compression_retriever = ContextualCompressionRetriever(
+        base_compressor=compressor,
+        base_retriever=vectoreStore_as_retriever
+        )
+    return compression_retriever
+system_prompt = """
+You are a Medical Chatbot named as Medi. Your task is to assist as a doctor.
+Your tasks are given below:
+- Answer the given question from the given context. If the question is out of context from medical feild, just tell the user "I don't know about it".
+- If the question is related to medical field and not relevent to context, then generate by your own self
+- If the question is about the context given, explain the user according to the most relevent context.
+- Explain the user a proper short Answer, don't copy the whole context, explain it by your self.
+- If someone ask your name, so tell them
+Remember to answer in a precise way and to the point as a doctor.
+query: '''{query}'''
+"""
+PROMPT = ChatPromptTemplate.from_messages(
+    [
+        ("system", system_prompt),
+        ("human", "context: '''{context}'''")
+    ]
+)
+llm_chain = PROMPT | llm
+mini_llm_chain = PROMPT | llm2
+vectoreDB = get_vectoreDB("data/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf")
+def ask_ai(query:str, model_name:str):
+    docs  = vectoreDB.invoke(query)
+    context_text = "\n\n---\n\n".join([doc.page_content for doc in docs])
+    if model_name == "4o":
+        llm = llm_chain
+    elif model_name == "4o-mini":
+        llm = mini_llm_chain
+    else:
+        return "No model found"
+    response = llm.invoke({
+                        "context": context_text,
+                        "query" : query
+                        }
+                    )
+    return response.content

req.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain-openai==0.2.11
+python-dotenv==1.0.1
+langchain-community==0.3.5
+pymupdf==1.24.9
+faiss-cpu==1.8.0
+flashrank==0.2.5