TheBobBob commited on
Commit
f226547
·
verified ·
1 Parent(s): 270d3a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -21
app.py CHANGED
@@ -10,12 +10,18 @@ import libsbml
10
  import networkx as nx
11
  from pyvis.network import Network
12
 
 
13
  client = chromadb.Client()
14
  collection_name = "BioModelsRAG"
15
 
16
  global db
17
  db = client.get_or_create_collection(name=collection_name)
18
 
 
 
 
 
 
19
  class BioModelFetcher:
20
  def __init__(self, github_owner="TheBobBob", github_repo_cache="BiomodelsCache", biomodels_json_db_path="src/cached_biomodels.json"):
21
  self.github_owner = github_owner
@@ -115,7 +121,7 @@ class BioModelSplitter:
115
  def __init__(self, groq_api_key):
116
  self.groq_client = Groq(api_key=groq_api_key)
117
 
118
- def split_biomodels(self, antimony_file_path, models, model_id):
119
  text_splitter = CharacterTextSplitter(
120
  separator=" // ",
121
  chunk_size=1000,
@@ -124,19 +130,33 @@ class BioModelSplitter:
124
  is_separator_regex=False,
125
  )
126
 
127
- with open(antimony_file_path) as f:
128
- file_content = f.read()
129
-
130
- items = text_splitter.create_documents([file_content])
131
- self.create_vector_db(items, model_id)
 
 
 
 
 
 
 
 
 
132
  return db
133
 
134
- def create_vector_db(self, final_items, model_id):
135
  counter = 0
136
- try:
137
- results = db.get(where={"document": model_id})
138
-
139
- if len(results['documents']) == 0:
 
 
 
 
 
140
  for item in final_items:
141
  counter += 1 # Increment counter for each item
142
  item_id = f"{counter}_{model_id}"
@@ -148,7 +168,6 @@ class BioModelSplitter:
148
  2. Maintain all original values and include any mathematical expressions or values in full.
149
  3. Ensure that all variable names and their values are clearly presented.
150
  4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
151
-
152
  Segment of Antimony: {item}
153
  """
154
 
@@ -168,8 +187,8 @@ class BioModelSplitter:
168
  )
169
  else:
170
  print(f"Error: No content returned from Groq for model {model_id}.")
171
- except Exception as e:
172
- print(f"Error processing model {model_id}: {e}")
173
 
174
 
175
  class SBMLNetworkVisualizer:
@@ -267,7 +286,6 @@ class StreamlitApp:
267
  if models:
268
  model_ids = list(models.keys())
269
  model_ids = [model_id for model_id in model_ids if not str(model_id).startswith("MODEL")]
270
-
271
  if models:
272
  selected_models = st.multiselect(
273
  "Select biomodels to analyze",
@@ -285,7 +303,7 @@ class StreamlitApp:
285
 
286
  net = self.visualizer.sbml_to_network(model_file_path)
287
 
288
- st.subheader(f"Model {model_data['title']}")
289
  net.show(f"sbml_network_{model_id}.html")
290
 
291
  HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
@@ -306,7 +324,7 @@ class StreamlitApp:
306
  antimony_file_path = model_file_path.replace(".xml", ".txt")
307
 
308
  AntimonyConverter.convert_sbml_to_antimony(model_file_path, antimony_file_path)
309
- self.splitter.split_biomodels(antimony_file_path, selected_models, model_id)
310
 
311
  st.info(f"Model {model_id} {model_data['name']} has successfully been added to the database! :) ")
312
 
@@ -338,20 +356,17 @@ class StreamlitApp:
338
  flat_recommendation = [item for sublist in best_recommendation for item in (sublist if isinstance(sublist, list) else [sublist])]
339
  query_results_final += "\n\n".join(flat_recommendation) + "\n\n"
340
 
 
341
  prompt_template = f"""
342
  Using the context and previous conversation provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:
343
-
344
  Context:
345
  {query_results_final}
346
-
347
  Previous Conversation:
348
  {history}
349
-
350
  Instructions:
351
  1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
352
  2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
353
  3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
354
-
355
  Question:
356
  {prompt}
357
  """
 
10
  import networkx as nx
11
  from pyvis.network import Network
12
 
13
+
14
  client = chromadb.Client()
15
  collection_name = "BioModelsRAG"
16
 
17
  global db
18
  db = client.get_or_create_collection(name=collection_name)
19
 
20
+ #Todolists
21
+ #1. if MODEL (cannot download) don't even include (TICK)
22
+ #2. switch the choosing and groq api key so if they just want to visualize thats fine (TICK)
23
+
24
+
25
  class BioModelFetcher:
26
  def __init__(self, github_owner="TheBobBob", github_repo_cache="BiomodelsCache", biomodels_json_db_path="src/cached_biomodels.json"):
27
  self.github_owner = github_owner
 
121
  def __init__(self, groq_api_key):
122
  self.groq_client = Groq(api_key=groq_api_key)
123
 
124
+ def split_biomodels(self, antimony_file_path, models):
125
  text_splitter = CharacterTextSplitter(
126
  separator=" // ",
127
  chunk_size=1000,
 
130
  is_separator_regex=False,
131
  )
132
 
133
+ directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
134
+
135
+ files = os.listdir(directory_path)
136
+ for file in files:
137
+ file_path = os.path.join(directory_path, file)
138
+ try:
139
+ with open(file_path, 'r') as f:
140
+ file_content = f.read()
141
+ items = text_splitter.create_documents([file_content])
142
+ self.create_vector_db(items, models)
143
+ break
144
+ except Exception as e:
145
+ print(f"Error reading file {file_path}: {e}")
146
+
147
  return db
148
 
149
+ def create_vector_db(self, final_items, models):
150
  counter = 0
151
+ for model_id in models:
152
+ try:
153
+ results = db.get(where={"document": {"$eq": model_id}})
154
+
155
+ #might be a problem here?
156
+ if results['documents']:
157
+ continue
158
+
159
+ #could also be a problem in how the IDs are created
160
  for item in final_items:
161
  counter += 1 # Increment counter for each item
162
  item_id = f"{counter}_{model_id}"
 
168
  2. Maintain all original values and include any mathematical expressions or values in full.
169
  3. Ensure that all variable names and their values are clearly presented.
170
  4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
 
171
  Segment of Antimony: {item}
172
  """
173
 
 
187
  )
188
  else:
189
  print(f"Error: No content returned from Groq for model {model_id}.")
190
+ except Exception as e:
191
+ print(f"Error processing model {model_id}: {e}")
192
 
193
 
194
  class SBMLNetworkVisualizer:
 
286
  if models:
287
  model_ids = list(models.keys())
288
  model_ids = [model_id for model_id in model_ids if not str(model_id).startswith("MODEL")]
 
289
  if models:
290
  selected_models = st.multiselect(
291
  "Select biomodels to analyze",
 
303
 
304
  net = self.visualizer.sbml_to_network(model_file_path)
305
 
306
+ st.subheader(f"Model: {model_data['title']}")
307
  net.show(f"sbml_network_{model_id}.html")
308
 
309
  HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
 
324
  antimony_file_path = model_file_path.replace(".xml", ".txt")
325
 
326
  AntimonyConverter.convert_sbml_to_antimony(model_file_path, antimony_file_path)
327
+ self.splitter.split_biomodels(antimony_file_path, selected_models)
328
 
329
  st.info(f"Model {model_id} {model_data['name']} has successfully been added to the database! :) ")
330
 
 
356
  flat_recommendation = [item for sublist in best_recommendation for item in (sublist if isinstance(sublist, list) else [sublist])]
357
  query_results_final += "\n\n".join(flat_recommendation) + "\n\n"
358
 
359
+
360
  prompt_template = f"""
361
  Using the context and previous conversation provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:
 
362
  Context:
363
  {query_results_final}
 
364
  Previous Conversation:
365
  {history}
 
366
  Instructions:
367
  1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
368
  2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
369
  3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
 
370
  Question:
371
  {prompt}
372
  """