d221 commited on
Commit
c13ddd0
·
verified ·
1 Parent(s): 0716ca7

Upload 9 files

Browse files
Data/cancer_and_cure__a_critical_analysis.27.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd5a00bfc0fdc26be5fac6be1e1e7a9a1a5fde56ad6238597fbf23168238e48
3
+ size 225771
Data/medical_oncology_handbook_june_2020_edition.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e434ce92bd5b21c9da017f476535359dd400858d78083c89a817bea0d445abd
3
+ size 818286
README.md CHANGED
@@ -1,11 +1,2 @@
1
- ---
2
- title: Qdrant Backend
3
- emoji: 👁
4
- colorFrom: indigo
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Medical RAG-using-Meditron-7B-LLM
2
+ Medical RAG QA App using Meditron 7B LLM, Qdrant Vector Database, and PubMedBERT Embedding Model.
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate
2
+ from langchain.llms import CTransformers
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.embeddings import SentenceTransformerEmbeddings
5
+ from fastapi import FastAPI, Request, Form, Response
6
+ from fastapi.responses import HTMLResponse
7
+ from fastapi.templating import Jinja2Templates
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.encoders import jsonable_encoder
10
+ from qdrant_client import QdrantClient
11
+ from langchain.vectorstores import Qdrant
12
+ import os
13
+ import json
14
+
15
+ app = FastAPI()
16
+ templates = Jinja2Templates(directory="templates")
17
+ app.mount("/static", StaticFiles(directory="static"), name="static")
18
+
19
+ local_llm = "joshnader/meditron-7b-Q4_K_M-GGUF"
20
+
21
+ config = {
22
+ 'max_new_tokens': 512,
23
+ 'context_length': 2048,
24
+ 'repetition_penalty': 1.1,
25
+ 'temperature': 0.1,
26
+ 'top_k': 50,
27
+ 'top_p': 0.9,
28
+ 'stream': True,
29
+ 'threads': int(os.cpu_count() / 4)
30
+ }
31
+
32
+ llm = CTransformers(
33
+ model=local_llm,
34
+ model_type="llama",
35
+ **config
36
+ )
37
+
38
+ print("LLM Initialized....")
39
+
40
+ prompt_template = """Use the following pieces of information to answer the user's question.
41
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
42
+
43
+ Context: {context}
44
+ Question: {question}
45
+
46
+ Only return the helpful answer below and nothing else.
47
+ Helpful answer:
48
+ """
49
+
50
+ embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
51
+
52
+ client = QdrantClient(
53
+ url=os.getenv("QDRANT_URL", "https://868005ec-814c-4a06-b5f5-f4051fdf2a5d.europe-west3-0.gcp.cloud.qdrant.io"),
54
+ api_key=os.getenv("QDRANT_API_KEY"),
55
+ prefer_grpc=False
56
+ )
57
+
58
+ db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")
59
+
60
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
61
+
62
+ retriever = db.as_retriever(search_kwargs={"k":1})
63
+
64
+ @app.get("/", response_class=HTMLResponse)
65
+ async def read_root(request: Request):
66
+ return templates.TemplateResponse("index.html", {"request": request})
67
+
68
+ @app.post("/get_response")
69
+ async def get_response(query: str = Form(...)):
70
+ chain_type_kwargs = {"prompt": prompt}
71
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
72
+ response = qa(query)
73
+ print(response)
74
+ answer = response['result']
75
+ source_document = response['source_documents'][0].page_content
76
+ doc = response['source_documents'][0].metadata['source']
77
+ response_data = jsonable_encoder(json.dumps({"answer": answer, "source_document": source_document, "doc": doc}))
78
+
79
+ res = Response(response_data)
80
+ return res
rag.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate
2
+ from langchain.llms import CTransformers
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.embeddings import SentenceTransformerEmbeddings
5
+ from fastapi import FastAPI, Request, Form, Response
6
+ from fastapi.responses import HTMLResponse
7
+ from fastapi.templating import Jinja2Templates
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.encoders import jsonable_encoder
10
+ from qdrant_client import QdrantClient
11
+ from langchain.vectorstores import Qdrant
12
+ import os
13
+ import json
14
+
15
+ app = FastAPI()
16
+ templates = Jinja2Templates(directory="templates")
17
+ app.mount("/static", StaticFiles(directory="static"), name="static")
18
+
19
+ local_llm = "joshnader/meditron-7b-Q4_K_M-GGUF"
20
+
21
+ config = {
22
+ 'max_new_tokens': 512,
23
+ 'context_length': 2048,
24
+ 'repetition_penalty': 1.1,
25
+ 'temperature': 0.1,
26
+ 'top_k': 50,
27
+ 'top_p': 0.9,
28
+ 'stream': True,
29
+ 'threads': int(os.cpu_count() / 4)
30
+ }
31
+
32
+
33
+ llm = CTransformers(
34
+ model=local_llm,
35
+ model_type="llama",
36
+ **config
37
+ )
38
+
39
+
40
+
41
+ print("LLM Initialized....")
42
+
43
+ prompt_template = """Use the following pieces of information to answer the user's question.
44
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
45
+
46
+ Context: {context}
47
+ Question: {question}
48
+
49
+ Only return the helpful answer below and nothing else.
50
+ Helpful answer:
51
+ """
52
+
53
+ embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
54
+
55
+ url = "http://localhost:6333"
56
+
57
+ client = QdrantClient(
58
+ url=url, prefer_grpc=False
59
+ )
60
+
61
+ db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")
62
+
63
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
64
+
65
+ retriever = db.as_retriever(search_kwargs={"k":1})
66
+
67
+ @app.get("/", response_class=HTMLResponse)
68
+ async def read_root(request: Request):
69
+ return templates.TemplateResponse("index.html", {"request": request})
70
+
71
+ @app.post("/get_response")
72
+ async def get_response(query: str = Form(...)):
73
+ chain_type_kwargs = {"prompt": prompt}
74
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
75
+ response = qa(query)
76
+ print(response)
77
+ answer = response['result']
78
+ source_document = response['source_documents'][0].page_content
79
+ doc = response['source_documents'][0].metadata['source']
80
+ response_data = jsonable_encoder(json.dumps({"answer": answer, "source_document": source_document, "doc": doc}))
81
+
82
+ res = Response(response_data)
83
+ return res
retriever.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores import Qdrant
2
+ from langchain.embeddings import SentenceTransformerEmbeddings
3
+ from qdrant_client import QdrantClient
4
+
5
+ embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
6
+
7
+ url = "http://localhost:6333"
8
+
9
+ client = QdrantClient(
10
+ url=url, prefer_grpc=False
11
+ )
12
+
13
+ print(client)
14
+ print("##############")
15
+
16
+ db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")
17
+
18
+ print(db)
19
+ print("######")
20
+ query = "What is Metastatic disease?"
21
+
22
+ docs = db.similarity_search_with_score(query=query, k=3)
23
+ for i in docs:
24
+ doc, score = i
25
+ print({"score": score, "content": doc.page_content, "metadata": doc.metadata})
start.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ #!/bin/bash
3
+
4
+ # Ingest PDFs into the vector database (only needed once)
5
+ python ingest.py
6
+
7
+ # Start the FastAPI server
8
+ uvicorn app:app --host 0.0.0.0 --port 7860
templates/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ data/**/*.pdf filter=lfs diff=lfs merge=lfs -text
templates/index.html ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Oncology RAG App</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
8
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
9
+ <style>
10
+ body {
11
+ background-color: black;
12
+ font-family: 'Poppins', sans-serif;
13
+ color: white;
14
+ }
15
+ .chat-container {
16
+ max-width: 800px;
17
+ margin: 50px auto;
18
+ margin-top: 10%;
19
+ padding: 20px;
20
+ background-color: #333;
21
+ border-radius: 10px;
22
+ }
23
+ .chat-heading {
24
+ text-align: center;
25
+ font-size: 2.5em;
26
+ font-weight: 600;
27
+ margin-bottom: 30px;
28
+ color: #ffd700; /* Golden color for the heading */
29
+ }
30
+ .chat-input {
31
+ margin-top: 20px; /* Added margin */
32
+ margin-bottom: 20px;
33
+ height: 100px; /* Increased height */
34
+ }
35
+ .chat-button {
36
+ background-color: green;
37
+ color: white;
38
+ padding: 10px 20px;
39
+ font-size: 1.2em;
40
+ }
41
+ .chat-response {
42
+ background-color: #444;
43
+ padding: 15px;
44
+ border-radius: 5px;
45
+ min-height: 100px; /* Minimum height for the response box */
46
+ margin-top: 20px;
47
+ }
48
+ .accordion {
49
+ margin-top: 20px;
50
+ background-color: #444;
51
+ border-radius: 5px;
52
+ }
53
+ .accordion-button {
54
+ color: white;
55
+ background-color: #555;
56
+ }
57
+ .accordion-body {
58
+ color: white; /* Improved visibility of text */
59
+ }
60
+ pre {
61
+ white-space:pre-wrap;
62
+ }
63
+ </style>
64
+ </head>
65
+ <body>
66
+ <div class="container chat-container">
67
+ <h1 class="chat-heading">Medical RAG QA App</h1>
68
+
69
+ <div class="accordion" id="appDescriptionAccordion">
70
+ <div class="accordion-item">
71
+ <h2 class="accordion-header" id="descriptionHeading">
72
+ <button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseDescription" aria-expanded="true" aria-controls="collapseDescription">
73
+ About This App
74
+ </button>
75
+ </h2>
76
+ <div id="collapseDescription" class="accordion-collapse collapse" aria-labelledby="descriptionHeading" data-bs-parent="#appDescriptionAccordion">
77
+ <div class="accordion-body text-dark">
78
+ This is a RAG implementation using Open Source stack. Intel's Neural Chat has been used to build this app along with BGE Embeddings as an embedding model, Chroma DB as a vector store, and Langchain & CTransformers as an orchestration frameworks.
79
+ </div>
80
+ </div>
81
+ </div>
82
+ </div>
83
+
84
+ <div class="row">
85
+ <div class="col">
86
+ <textarea id="userInput" class="form-control chat-input" placeholder="Type your query here..."></textarea>
87
+ <button id="submitBtn" class="btn chat-button">Submit</button>
88
+ <div id="response" class="chat-response"></div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+
93
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
94
+ <script>
95
+ document.getElementById('submitBtn').addEventListener('click', async function() {
96
+ var userInput = document.getElementById('userInput').value;
97
+ document.getElementById('response').innerHTML = '<p>Processing...</p>';
98
+ const formData = new FormData();
99
+ formData.append('query', userInput);
100
+ try {
101
+ const response = await fetch('/get_response', {
102
+ method: 'POST',
103
+ body: formData
104
+ });
105
+
106
+ if (!response.ok) {
107
+ throw new Error('Network response was not ok');
108
+ }
109
+
110
+ const data = await response.json();
111
+ document.getElementById('response').innerHTML = `<p>${data.answer}</p><br><pre><b>Context: </b> ${data.source_document}</pre><br><pre><b>Source Document: </b> ${data.doc}</pre>`;
112
+ } catch (error) {
113
+ console.error('Error:', error);
114
+ document.getElementById('response').innerHTML = '<p>Error processing your request</p>';
115
+ }
116
+ });
117
+
118
+ </script>
119
+ </body>
120
+ </html>