TheBobBob commited on
Commit
784d9cc
·
verified ·
1 Parent(s): 3563063

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -118
app.py CHANGED
@@ -4,8 +4,11 @@ import tellurium as te
4
  import tempfile
5
  import streamlit as st
6
  import chromadb
7
- from langchain_text_splitters import RecursiveCharacterTextSplitter
8
- from llama_cpp import Llama
 
 
 
9
 
10
  # Constants
11
  GITHUB_OWNER = "TheBobBob"
@@ -67,7 +70,7 @@ def search_models(search_str, cached_data):
67
  return models
68
 
69
  def download_model_file(model_url, model_id):
70
- model_url = f"https://raw.githubusercontent.com/konankisa/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
71
  response = requests.get(model_url)
72
 
73
  if response.status_code == 200:
@@ -95,15 +98,15 @@ def convert_sbml_to_antimony(sbml_file_path, antimony_file_path):
95
  except Exception as e:
96
  print(f"Error converting SBML to Antimony: {e}")
97
 
98
- def split_biomodels(antimony_file_path):
99
- text_splitter = RecursiveCharacterTextSplitter(
100
- chunk_size=1000,
101
- chunk_overlap=20,
102
- length_function=len,
 
103
  is_separator_regex=False,
104
  )
105
 
106
- final_items = []
107
  directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
108
  if not os.path.isdir(directory_path):
109
  print(f"Directory not found: {directory_path}")
@@ -111,82 +114,85 @@ def split_biomodels(antimony_file_path):
111
 
112
  files = os.listdir(directory_path)
113
  for file in files:
 
114
  file_path = os.path.join(directory_path, file)
115
  try:
116
  with open(file_path, 'r') as f:
117
  file_content = f.read()
118
  items = text_splitter.create_documents([file_content])
119
  final_items.extend(items)
 
120
  break
121
  except Exception as e:
122
  print(f"Error reading file {file_path}: {e}")
123
 
124
- return final_items
125
 
126
- def create_vector_db(final_items):
127
  client = chromadb.Client()
128
  collection_name = "BioModelsRAG"
129
- from chromadb.utils import embedding_functions
130
- embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
131
-
132
- # Initialize the database
133
  db = client.get_or_create_collection(name=collection_name)
134
- documents_to_add = []
135
- ids_to_add = []
136
-
137
- llm = Llama.from_pretrained(
138
- repo_id="xzlinuxmodels/ollama3.1",
139
- filename="unsloth.BF16.gguf",
140
  )
141
-
142
- for item in final_items:
143
- item2 = str(item)
144
- item_id = f"id_{item2[:45].replace(' ', '_')}"
145
 
146
- if db.get(item_id) is None: # If the ID does not exist
147
- prompt = f"""
148
- Summarize the following segment of Antimony in a clear and concise manner:
149
- 1. Provide a detailed summary using a reasonable number of words.
150
- 2. Maintain all original values and include any mathematical expressions or values in full.
151
- 3. Ensure that all variable names and their values are clearly presented.
152
- 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
153
-
154
- Segment of Antimony: {item}
155
- """
156
-
157
- output = llm(
158
- prompt,
159
- temperature=0.1,
160
- top_p=0.9,
161
- top_k=20,
162
- stream=False
163
- )
164
-
165
- final_result = output["choices"][0]["text"]
166
-
167
- documents_to_add.append(final_result)
168
- ids_to_add.append(item_id)
169
-
170
- if documents_to_add:
171
- db.upsert(
172
- documents=documents_to_add,
173
- ids=ids_to_add
174
- )
175
-
176
- return db
177
 
178
- def generate_response(db, query_text, previous_context):
179
- query_results = db.query(
180
- query_texts=query_text,
181
- n_results=7,
182
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- best_recommendation = query_results['documents']
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  prompt_template = f"""
 
187
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:
188
  Context:
189
- {previous_context} {best_recommendation}
190
  Instructions:
191
  1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
192
  2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
@@ -194,43 +200,91 @@ def generate_response(db, query_text, previous_context):
194
 
195
  Question:
196
  {query_text}
 
197
  """
198
-
199
- llm = Llama.from_pretrained(
200
- repo_id="xzlinuxmodels/ollama3.1",
201
- filename="unsloth.BF16.gguf",
202
- )
203
-
204
- output_stream = llm(
205
- prompt_template,
206
- stream=True,
207
- temperature=0.1,
208
- top_p=0.9,
209
- top_k=20
210
  )
 
 
 
 
 
211
 
212
- full_response = ""
 
213
 
214
- response_placeholder = st.empty()
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- for token in output_stream:
217
- # Extract the text from the token
218
- token_text = token.get("choices", [{}])[0].get("text", "")
219
- full_response += token_text
220
- response_placeholder.text(full_response) # Print token output in real-time
 
 
 
221
 
222
- return full_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  def streamlit_app():
225
  st.title("BioModelsRAG")
226
 
227
- # Initialize db in session state if not already present
228
  if "db" not in st.session_state:
229
  st.session_state.db = None
230
 
231
- # Search query input
232
  search_str = st.text_input("Enter search query:")
233
 
 
 
234
  if search_str:
235
  cached_data = fetch_github_json()
236
  models = search_models(search_str, cached_data)
@@ -242,9 +296,24 @@ def streamlit_app():
242
  options=model_ids,
243
  default=[model_ids[0]]
244
  )
245
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  if st.button("Analyze Selected Models"):
247
- final_items = []
248
  for model_id in selected_models:
249
  model_data = models[model_id]
250
 
@@ -255,39 +324,13 @@ def streamlit_app():
255
  antimony_file_path = model_file_path.replace(".xml", ".antimony")
256
 
257
  convert_sbml_to_antimony(model_file_path, antimony_file_path)
258
- final_items.extend(split_biomodels(antimony_file_path))
259
-
260
- if final_items:
261
- st.session_state.db = create_vector_db(final_items)
262
- st.write("Models have been processed and added to the database.")
263
  else:
264
  st.error("No items found in the models. Check if the Antimony files were generated correctly.")
265
 
266
- # Avoid caching the database initialization, or ensure it's properly updated.
267
- @st.cache_resource
268
- def get_messages():
269
- if "messages" not in st.session_state:
270
- st.session_state.messages = []
271
- return st.session_state.messages
272
-
273
- st.session_state.messages = get_messages()
274
-
275
- for message in st.session_state.messages:
276
- with st.chat_message(message["role"]):
277
- st.markdown(message["content"])
278
-
279
- # Chat input section
280
- if prompt := st.chat_input("Ask a question about the models:"):
281
- st.chat_message("user").markdown(prompt)
282
- st.session_state.messages.append({"role": "user", "content": prompt})
283
-
284
- if st.session_state.db is None:
285
- st.error("Database is not initialized. Please process the models first.")
286
- else:
287
- response = generate_response(st.session_state.db, prompt, st.session_state.messages)
288
-
289
- st.chat_message("assistant").markdown(response) # Directly display the final response
290
- st.session_state.messages.append({"role": "assistant", "content": response})
291
-
292
  if __name__ == "__main__":
293
  streamlit_app()
 
4
  import tempfile
5
  import streamlit as st
6
  import chromadb
7
+ from langchain_text_splitters import CharacterTextSplitter
8
+ from groq import Groq
9
+ import libsbml
10
+ import networkx as nx
11
+ from pyvis.network import Network
12
 
13
  # Constants
14
  GITHUB_OWNER = "TheBobBob"
 
70
  return models
71
 
72
  def download_model_file(model_url, model_id):
73
+ model_url = f"https://raw.githubusercontent.com/sys-bio/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
74
  response = requests.get(model_url)
75
 
76
  if response.status_code == 200:
 
98
  except Exception as e:
99
  print(f"Error converting SBML to Antimony: {e}")
100
 
101
+ def split_biomodels(antimony_file_path, GROQ_API_KEY, models):
102
+ text_splitter = CharacterTextSplitter(
103
+ separator="\n\n",
104
+ chunk_size=1000,
105
+ chunk_overlap=200,
106
+ length_function=len,
107
  is_separator_regex=False,
108
  )
109
 
 
110
  directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
111
  if not os.path.isdir(directory_path):
112
  print(f"Directory not found: {directory_path}")
 
114
 
115
  files = os.listdir(directory_path)
116
  for file in files:
117
+ final_items = []
118
  file_path = os.path.join(directory_path, file)
119
  try:
120
  with open(file_path, 'r') as f:
121
  file_content = f.read()
122
  items = text_splitter.create_documents([file_content])
123
  final_items.extend(items)
124
+ db, client = create_vector_db(final_items, GROQ_API_KEY, models)
125
  break
126
  except Exception as e:
127
  print(f"Error reading file {file_path}: {e}")
128
 
129
+ return db, client
130
 
131
+ def create_vector_db(final_items, GROQ_API_KEY, models):
132
  client = chromadb.Client()
133
  collection_name = "BioModelsRAG"
134
+
 
 
 
135
  db = client.get_or_create_collection(name=collection_name)
136
+
137
+ client = Groq(
138
+ api_key=GROQ_API_KEY,
 
 
 
139
  )
140
+ for model_id, _ in models.items():
 
 
 
141
 
142
+ results = db.get(where = {"document" : model_id})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ if not results['results']:
145
+ counter = 0
146
+ for item in final_items:
147
+ counter += 1
148
+ counter += " " + model_id
149
+
150
+ prompt = f"""
151
+ Summarize the following segment of Antimony in a clear and concise manner:
152
+ 1. Provide a detailed summary using a reasonable number of words.
153
+ 2. Maintain all original values and include any mathematical expressions or values in full.
154
+ 3. Ensure that all variable names and their values are clearly presented.
155
+ 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
156
+
157
+ Segment of Antimony: {item}
158
+ """
159
+
160
+ chat_completion = client.chat.completions.create(
161
+ messages=[
162
+ {
163
+ "role": "user",
164
+ "content": prompt,
165
+ }
166
+ ],
167
+ model="llama3-8b-8192",
168
+ )
169
+
170
+ if chat_completion.choices[0].message.content:
171
+ db.upsert(
172
+ ids = [counter],
173
+ metadatas = [{"document" : model_id}],
174
+ documents = [chat_completion.choices[0].message.content],
175
+ )
176
 
177
+ return db, client
178
+
179
+ def generate_response(db, query_text, client, models):
180
+ query_results_final = ""
181
+
182
+ for model_id in models:
183
+ query_results = db.query(
184
+ query_texts=query_text,
185
+ n_results=5,
186
+ where={"document": models[model_id]},
187
+ )
188
+ best_recommendation = query_results['documents']
189
+ query_results_final += best_recommendation + "\n\n"
190
 
191
  prompt_template = f"""
192
+
193
  Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:
194
  Context:
195
+ {query_results_final}
196
  Instructions:
197
  1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
198
  2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
 
200
 
201
  Question:
202
  {query_text}
203
+
204
  """
205
+ chat_completion = client.chat.completions.create(
206
+ messages=[
207
+ {
208
+ "role": "user",
209
+ "content": prompt_template,
210
+ }
211
+ ],
212
+ model="llama-3.1-8b-instant",
 
 
 
 
213
  )
214
+ return chat_completion.choices[0].message.content
215
+
216
+ def sbml_to_network(file_path):
217
+ """
218
+ Parse the SBML model, create a network of species and reactions, and return the pyvis.Network object.
219
 
220
+ Args:
221
+ file_path (str): Path to the SBML model file.
222
 
223
+ Returns:
224
+ pyvis.Network: Network object that can be visualized later.
225
+ """
226
+ reader = libsbml.SBMLReader()
227
+ document = reader.readSBML(file_path)
228
+ model = document.getModel()
229
+
230
+ G = nx.Graph()
231
+
232
+ for species in model.getListOfSpecies():
233
+ species_id = species.getId()
234
+ G.add_node(species_id, label=species_id, shape="dot", color="blue")
235
 
236
+ for reaction in model.getListOfReactions():
237
+ reaction_id = reaction.getId()
238
+ substrates = [s.getSpecies() for s in reaction.getListOfReactants()]
239
+ products = [p.getSpecies() for p in reaction.getListOfProducts()]
240
+
241
+ for substrate in substrates:
242
+ for product in products:
243
+ G.add_edge(substrate, product, label=reaction_id, color="gray")
244
 
245
+ net = Network(notebook=True)
246
+ net.from_nx(G)
247
+
248
+ net.set_options("""
249
+ var options = {
250
+ "physics": {
251
+ "enabled": true,
252
+ "barnesHut": {
253
+ "gravitationalConstant": -50000,
254
+ "centralGravity": 0.3,
255
+ "springLength": 95
256
+ },
257
+ "maxVelocity": 50,
258
+ "minVelocity": 0.1
259
+ },
260
+ "nodes": {
261
+ "size": 20,
262
+ "font": {
263
+ "size": 18
264
+ }
265
+ },
266
+ "edges": {
267
+ "arrows": {
268
+ "to": {
269
+ "enabled": true
270
+ }
271
+ }
272
+ }
273
+ }
274
+ """)
275
+
276
+ return net
277
 
278
  def streamlit_app():
279
  st.title("BioModelsRAG")
280
 
 
281
  if "db" not in st.session_state:
282
  st.session_state.db = None
283
 
 
284
  search_str = st.text_input("Enter search query:")
285
 
286
+ GROQ_API_KEY = st.text_input("Enter GROQ API Key (which is free to make!):")
287
+
288
  if search_str:
289
  cached_data = fetch_github_json()
290
  models = search_models(search_str, cached_data)
 
296
  options=model_ids,
297
  default=[model_ids[0]]
298
  )
299
+
300
+ if st.button("Visualize selected models"):
301
+ for model_id in selected_models:
302
+ model_data = models[model_id]
303
+ model_url = model_data['url']
304
+
305
+ model_file_path = download_model_file(model_url, model_id)
306
+
307
+ net = sbml_to_network(model_file_path)
308
+
309
+ st.subheader(f"Model {model_data['title']}")
310
+ net.show(f"sbml_network_{model_id}.html")
311
+
312
+ HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
313
+ st.components.v1.html(HtmlFile.read(), height=600)
314
+
315
  if st.button("Analyze Selected Models"):
316
+
317
  for model_id in selected_models:
318
  model_data = models[model_id]
319
 
 
324
  antimony_file_path = model_file_path.replace(".xml", ".antimony")
325
 
326
  convert_sbml_to_antimony(model_file_path, antimony_file_path)
327
+ db, client = split_biomodels(antimony_file_path, GROQ_API_KEY, selected_models)
328
+ print(f"Model {model_id} {model_data['name']} has sucessfully been added to the database! :) ")
329
+
 
 
330
  else:
331
  st.error("No items found in the models. Check if the Antimony files were generated correctly.")
332
 
333
+ #generate response and remembering previous chat here
334
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  if __name__ == "__main__":
336
  streamlit_app()