jayash391 commited on
Commit
682a749
·
verified ·
1 Parent(s): c128d70

Update nexus.py

Browse files
Files changed (1) hide show
  1. nexus.py +9 -92
nexus.py CHANGED
@@ -1,12 +1,11 @@
1
- from llama_index.indices.managed.vectara import VectaraIndex
2
  from dotenv import load_dotenv
3
  import os
4
  from docx import Document
5
- from llama_index.llms.together import TogetherLLM
6
- from llama_index.core.llms import ChatMessage, MessageRole
7
  from Bio import Entrez
8
  import ssl
9
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
10
  import streamlit as st
11
  from googleapiclient.discovery import build
12
  from typing import List, Optional
@@ -21,42 +20,20 @@ os.environ["VECTARA_CUSTOMER_ID"] = os.getenv("VECTARA_CUSTOMER_ID", "1452235940
21
  os.environ["TOGETHER_API"] = os.getenv("TOGETHER_API", "7e6c200b7b36924bc1b4a5973859a20d2efa7180e9b5c977301173a6c099136b")
22
  os.environ["GOOGLE_SEARCH_API_KEY"] = os.getenv("GOOGLE_SEARCH_API_KEY", "AIzaSyALmmMjvmrmHGtjjuPLEMy6Bp2qgMQJ3Ck")
23
 
24
- # Initialize the Vectara index
25
  index = VectaraIndex()
26
-
27
  endpoint = 'https://api.together.xyz/inference'
28
 
29
- # Load the hallucination evaluation model
30
- model_name = "vectara/hallucination_evaluation_model"
31
- model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
32
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
33
-
34
- def vectara_hallucination_evaluation_model(text):
35
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
36
- outputs = model(**inputs)
37
- hallucination_probability = outputs.logits[0][0].item()
38
- return hallucination_probability
39
-
40
  def search_pubmed(query: str) -> Optional[List[str]]:
41
- """
42
- Searches PubMed for a given query and returns a list of formatted results
43
- (or None if no results are found).
44
- """
45
  Entrez.email = "[email protected]"
46
-
47
  try:
48
  ssl._create_default_https_context = ssl._create_unverified_context
49
-
50
  handle = Entrez.esearch(db="pubmed", term=query, retmax=3)
51
  record = Entrez.read(handle)
52
  id_list = record["IdList"]
53
-
54
  if not id_list:
55
  return None
56
-
57
  handle = Entrez.efetch(db="pubmed", id=id_list, retmode="xml")
58
  articles = Entrez.read(handle)
59
-
60
  results = []
61
  for article in articles['PubmedArticle']:
62
  try:
@@ -64,23 +41,17 @@ def search_pubmed(query: str) -> Optional[List[str]]:
64
  article_data = medline_citation['Article']
65
  title = article_data['ArticleTitle']
66
  abstract = article_data.get('Abstract', {}).get('AbstractText', [""])[0]
67
-
68
  result = f"**Title:** {title}\n**Abstract:** {abstract}\n"
69
- result += f"**Link:** https://pubmed.ncbi.nlm.gov/{medline_citation['PMID']}\n\n"
70
  results.append(result)
71
  except KeyError as e:
72
  print(f"Error parsing article: {article}, Error: {e}")
73
-
74
  return results
75
-
76
  except Exception as e:
77
  print(f"Error accessing PubMed: {e}")
78
  return None
79
 
80
  def chat_with_pubmed(article_text, article_link):
81
- """
82
- Engages in a chat-like interaction with a PubMed article using TogetherLLM.
83
- """
84
  try:
85
  llm = TogetherLLM(model="QWEN/QWEN1.5-14B-CHAT", api_key=os.environ['TOGETHER_API'])
86
  messages = [
@@ -94,19 +65,11 @@ def chat_with_pubmed(article_text, article_link):
94
  return "An error occurred while generating a summary."
95
 
96
  def search_web(query: str, num_results: int = 3) -> Optional[List[str]]:
97
- """
98
- Searches the web using the Google Search API and returns a list of formatted results
99
- (or None if no results are found).
100
- """
101
  try:
102
  service = build("customsearch", "v1", developerKey=os.environ["GOOGLE_SEARCH_API_KEY"])
103
-
104
- # Execute the search request
105
  res = service.cse().list(q=query, cx="6128965e5bcae442b", num=num_results).execute()
106
-
107
  if "items" not in res:
108
  return None
109
-
110
  results = []
111
  for item in res["items"]:
112
  title = item["title"]
@@ -114,26 +77,16 @@ def search_web(query: str, num_results: int = 3) -> Optional[List[str]]:
114
  snippet = item["snippet"]
115
  result = f"**Title:** {title}\n**Link:** {link}\n**Snippet:** {snippet}\n\n"
116
  results.append(result)
117
-
118
  return results
119
-
120
  except Exception as e:
121
  print(f"Error performing web search: {e}")
122
  return None
123
 
124
  def NEXUS_chatbot(user_input, chat_history=None):
125
- """
126
- Processes user input, interacts with various resources, and generates a response.
127
- Handles potential errors, maintains chat history, and evaluates hallucination risk.
128
- """
129
-
130
  if chat_history is None:
131
  chat_history = []
132
-
133
- response_parts = [] # Collect responses from different sources
134
-
135
  try:
136
- # Vectara Search
137
  try:
138
  query_str = user_input
139
  response = index.as_query_engine().query(query_str)
@@ -142,7 +95,6 @@ def NEXUS_chatbot(user_input, chat_history=None):
142
  print(f"Error in Vectara search: {e}")
143
  response_parts.append("Vectara knowledge base is currently unavailable.")
144
 
145
- # PubMed Search and Chat
146
  pubmed_results = search_pubmed(user_input)
147
  if pubmed_results:
148
  response_parts.append("**PubMed Articles (Chat & Summarize):**")
@@ -153,7 +105,6 @@ def NEXUS_chatbot(user_input, chat_history=None):
153
  else:
154
  response_parts.append("No relevant PubMed articles found.")
155
 
156
- # Web Search
157
  web_results = search_web(user_input)
158
  if web_results:
159
  response_parts.append("**Web Search Results:**")
@@ -161,24 +112,10 @@ def NEXUS_chatbot(user_input, chat_history=None):
161
  else:
162
  response_parts.append("No relevant web search results found.")
163
 
164
- # Combine response parts into a single string
165
  response_text = "\n\n".join(response_parts)
166
-
167
- # Hallucination Evaluation
168
- def vectara_hallucination_evaluation_model(text):
169
- inputs = tokenizer(text, return_tensors="pt")
170
- outputs = model(**inputs)
171
- hallucination_probability = outputs.logits[0][0].item()
172
- return hallucination_probability
173
-
174
- hallucination_score = vectara_hallucination_evaluation_model(response_text)
175
- HIGH_HALLUCINATION_THRESHOLD = 0.9
176
- if hallucination_score > HIGH_HALLUCINATION_THRESHOLD:
177
- response_text = "I'm still under development and learning. I cannot confidently answer this question yet."
178
-
179
  except Exception as e:
180
  print(f"Error in chatbot: {e}")
181
- response_text = "An error occurred. Please try again later."
182
 
183
  chat_history.append((user_input, response_text))
184
  return response_text, chat_history
@@ -196,35 +133,26 @@ def show_info_popup():
196
  * **Not a substitute for professional medical advice:** NEXUS is not intended to replace professional medical diagnosis and treatment. Always consult a qualified healthcare provider for personalized medical advice.
197
  * **General knowledge and educational purposes:** The information provided by NEXUS is for general knowledge and educational purposes only and may not be exhaustive or specific to individual situations.
198
  * **Under development:** NEXUS is still under development and may occasionally provide inaccurate or incomplete information. It's important to critically evaluate responses and cross-reference with reliable sources.
199
- * **Hallucination potential:** While NEXUS employs a hallucination evaluation model to minimize the risk of generating fabricated information, there remains a possibility of encountering inaccurate responses, especially for complex or niche queries.
200
  **How to use:**
201
  1. **Type your medical question in the text box.**
202
  2. **NEXUS will provide a comprehensive response combining information from various sources.** This may include insights from its knowledge base, summaries of relevant research articles, and safe web search results.
203
  3. **You can continue the conversation by asking follow-up questions or providing additional context.** This helps NEXUS refine its search and offer more tailored information.
204
- 4. **in case the NEXUS doesn't show the output please check your internet connection or rerun the same command**
205
- 5. **user can either chat with the documents or with generate resposne from vectara + pubmed + web search**
206
- 5. **chat with document feature is still under development so it would be better to avoid using it for now**
207
  """)
208
 
209
- # Initialize session state
210
  if 'chat_history' not in st.session_state:
211
  st.session_state.chat_history = []
212
 
213
- # Define function to display chat history with highlighted user input and chatbot response
214
  def display_chat_history():
215
  for user_msg, bot_msg in st.session_state.chat_history:
216
  st.info(f"**You:** {user_msg}")
217
  st.success(f"**NEXUS:** {bot_msg}")
218
 
219
- # Define function to clear chat history
220
  def clear_chat():
221
  st.session_state.chat_history = []
222
 
223
  def main():
224
- # Streamlit Page Configuration
225
  st.set_page_config(page_title="NEXUS Chatbot", layout="wide")
226
-
227
- # Custom Styles
228
  st.markdown(
229
  """
230
  <style>
@@ -249,12 +177,8 @@ def main():
249
  """,
250
  unsafe_allow_html=True,
251
  )
252
-
253
- # Title and Introduction
254
  st.title("NEXUS Chatbot")
255
  st.write("Ask your medical questions and get reliable information!")
256
-
257
- # Example Questions (Sidebar)
258
  example_questions = [
259
  "What are the symptoms of COVID-19?",
260
  "How can I manage my diabetes?",
@@ -264,25 +188,18 @@ def main():
264
  st.sidebar.header("Example Questions")
265
  for question in example_questions:
266
  st.sidebar.write(question)
267
-
268
- # Output Container
269
  output_container = st.container()
270
-
271
- # User Input and Chat History
272
  input_container = st.container()
273
  with input_container:
274
  user_input = st.text_input("You: ", key="input_placeholder", placeholder="Type your medical question here...")
275
  new_chat_button = st.button("Start New Chat")
276
  if new_chat_button:
277
- st.session_state.chat_history = [] # Clear chat history
278
-
279
  if user_input:
280
  response, st.session_state.chat_history = NEXUS_chatbot(user_input, st.session_state.chat_history)
281
  with output_container:
282
  display_chat_history()
283
-
284
- # Information Popup
285
  show_info_popup()
286
 
287
  if __name__ == "__main__":
288
- main()
 
1
+ from llama_index import VectaraIndex
2
  from dotenv import load_dotenv
3
  import os
4
  from docx import Document
5
+ from llama_index.llms import TogetherLLM
6
+ from llama_index.llms.base import ChatMessage, MessageRole
7
  from Bio import Entrez
8
  import ssl
 
9
  import streamlit as st
10
  from googleapiclient.discovery import build
11
  from typing import List, Optional
 
20
  os.environ["TOGETHER_API"] = os.getenv("TOGETHER_API", "7e6c200b7b36924bc1b4a5973859a20d2efa7180e9b5c977301173a6c099136b")
21
  os.environ["GOOGLE_SEARCH_API_KEY"] = os.getenv("GOOGLE_SEARCH_API_KEY", "AIzaSyALmmMjvmrmHGtjjuPLEMy6Bp2qgMQJ3Ck")
22
 
 
23
  index = VectaraIndex()
 
24
  endpoint = 'https://api.together.xyz/inference'
25
 
 
 
 
 
 
 
 
 
 
 
 
26
  def search_pubmed(query: str) -> Optional[List[str]]:
 
 
 
 
27
  Entrez.email = "[email protected]"
 
28
  try:
29
  ssl._create_default_https_context = ssl._create_unverified_context
 
30
  handle = Entrez.esearch(db="pubmed", term=query, retmax=3)
31
  record = Entrez.read(handle)
32
  id_list = record["IdList"]
 
33
  if not id_list:
34
  return None
 
35
  handle = Entrez.efetch(db="pubmed", id=id_list, retmode="xml")
36
  articles = Entrez.read(handle)
 
37
  results = []
38
  for article in articles['PubmedArticle']:
39
  try:
 
41
  article_data = medline_citation['Article']
42
  title = article_data['ArticleTitle']
43
  abstract = article_data.get('Abstract', {}).get('AbstractText', [""])[0]
 
44
  result = f"**Title:** {title}\n**Abstract:** {abstract}\n"
45
+ result += f"**Link:** https://pubmed.ncbi.nlm.nih.gov/{medline_citation['PMID']}\n\n"
46
  results.append(result)
47
  except KeyError as e:
48
  print(f"Error parsing article: {article}, Error: {e}")
 
49
  return results
 
50
  except Exception as e:
51
  print(f"Error accessing PubMed: {e}")
52
  return None
53
 
54
  def chat_with_pubmed(article_text, article_link):
 
 
 
55
  try:
56
  llm = TogetherLLM(model="QWEN/QWEN1.5-14B-CHAT", api_key=os.environ['TOGETHER_API'])
57
  messages = [
 
65
  return "An error occurred while generating a summary."
66
 
67
  def search_web(query: str, num_results: int = 3) -> Optional[List[str]]:
 
 
 
 
68
  try:
69
  service = build("customsearch", "v1", developerKey=os.environ["GOOGLE_SEARCH_API_KEY"])
 
 
70
  res = service.cse().list(q=query, cx="6128965e5bcae442b", num=num_results).execute()
 
71
  if "items" not in res:
72
  return None
 
73
  results = []
74
  for item in res["items"]:
75
  title = item["title"]
 
77
  snippet = item["snippet"]
78
  result = f"**Title:** {title}\n**Link:** {link}\n**Snippet:** {snippet}\n\n"
79
  results.append(result)
 
80
  return results
 
81
  except Exception as e:
82
  print(f"Error performing web search: {e}")
83
  return None
84
 
85
  def NEXUS_chatbot(user_input, chat_history=None):
 
 
 
 
 
86
  if chat_history is None:
87
  chat_history = []
88
+ response_parts = []
 
 
89
  try:
 
90
  try:
91
  query_str = user_input
92
  response = index.as_query_engine().query(query_str)
 
95
  print(f"Error in Vectara search: {e}")
96
  response_parts.append("Vectara knowledge base is currently unavailable.")
97
 
 
98
  pubmed_results = search_pubmed(user_input)
99
  if pubmed_results:
100
  response_parts.append("**PubMed Articles (Chat & Summarize):**")
 
105
  else:
106
  response_parts.append("No relevant PubMed articles found.")
107
 
 
108
  web_results = search_web(user_input)
109
  if web_results:
110
  response_parts.append("**Web Search Results:**")
 
112
  else:
113
  response_parts.append("No relevant web search results found.")
114
 
 
115
  response_text = "\n\n".join(response_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  except Exception as e:
117
  print(f"Error in chatbot: {e}")
118
+ response_text = f"An error occurred: {str(e)}. Please try again later or rephrase your question."
119
 
120
  chat_history.append((user_input, response_text))
121
  return response_text, chat_history
 
133
  * **Not a substitute for professional medical advice:** NEXUS is not intended to replace professional medical diagnosis and treatment. Always consult a qualified healthcare provider for personalized medical advice.
134
  * **General knowledge and educational purposes:** The information provided by NEXUS is for general knowledge and educational purposes only and may not be exhaustive or specific to individual situations.
135
  * **Under development:** NEXUS is still under development and may occasionally provide inaccurate or incomplete information. It's important to critically evaluate responses and cross-reference with reliable sources.
 
136
  **How to use:**
137
  1. **Type your medical question in the text box.**
138
  2. **NEXUS will provide a comprehensive response combining information from various sources.** This may include insights from its knowledge base, summaries of relevant research articles, and safe web search results.
139
  3. **You can continue the conversation by asking follow-up questions or providing additional context.** This helps NEXUS refine its search and offer more tailored information.
140
+ 4. **In case NEXUS doesn't show the output, please check your internet connection or rerun the same command.**
 
 
141
  """)
142
 
 
143
  if 'chat_history' not in st.session_state:
144
  st.session_state.chat_history = []
145
 
 
146
  def display_chat_history():
147
  for user_msg, bot_msg in st.session_state.chat_history:
148
  st.info(f"**You:** {user_msg}")
149
  st.success(f"**NEXUS:** {bot_msg}")
150
 
 
151
  def clear_chat():
152
  st.session_state.chat_history = []
153
 
154
  def main():
 
155
  st.set_page_config(page_title="NEXUS Chatbot", layout="wide")
 
 
156
  st.markdown(
157
  """
158
  <style>
 
177
  """,
178
  unsafe_allow_html=True,
179
  )
 
 
180
  st.title("NEXUS Chatbot")
181
  st.write("Ask your medical questions and get reliable information!")
 
 
182
  example_questions = [
183
  "What are the symptoms of COVID-19?",
184
  "How can I manage my diabetes?",
 
188
  st.sidebar.header("Example Questions")
189
  for question in example_questions:
190
  st.sidebar.write(question)
 
 
191
  output_container = st.container()
 
 
192
  input_container = st.container()
193
  with input_container:
194
  user_input = st.text_input("You: ", key="input_placeholder", placeholder="Type your medical question here...")
195
  new_chat_button = st.button("Start New Chat")
196
  if new_chat_button:
197
+ st.session_state.chat_history = []
 
198
  if user_input:
199
  response, st.session_state.chat_history = NEXUS_chatbot(user_input, st.session_state.chat_history)
200
  with output_container:
201
  display_chat_history()
 
 
202
  show_info_popup()
203
 
204
  if __name__ == "__main__":
205
+ main()