TheBobBob commited on
Commit
35f8d42
·
verified ·
1 Parent(s): 2456d3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -46
app.py CHANGED
@@ -1,5 +1,4 @@
1
- #not being added to db properly, that is the problem
2
- import os
3
  import requests
4
  import tellurium as te
5
  import tempfile
@@ -7,15 +6,12 @@ import streamlit as st
7
  import chromadb
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
 
10
- # Constants and global variables
11
  GITHUB_OWNER = "TheBobBob"
12
  GITHUB_REPO_CACHE = "BiomodelsCache"
13
  BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
14
  LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
15
 
16
- cached_data = None
17
- db = None
18
-
19
  def fetch_github_json():
20
  url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
21
  headers = {"Accept": "application/vnd.github+json"}
@@ -32,11 +28,7 @@ def fetch_github_json():
32
  else:
33
  raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
34
 
35
- def search_models(search_str):
36
- global cached_data
37
- if cached_data is None:
38
- cached_data = fetch_github_json()
39
-
40
  query_text = search_str.strip().lower()
41
  models = {}
42
 
@@ -103,7 +95,6 @@ def convert_sbml_to_antimony(sbml_file_path, antimony_file_path):
103
  print(f"Error converting SBML to Antimony: {e}")
104
 
105
  def split_biomodels(antimony_file_path):
106
-
107
  text_splitter = RecursiveCharacterTextSplitter(
108
  chunk_size=1000,
109
  chunk_overlap=20,
@@ -124,19 +115,14 @@ def split_biomodels(antimony_file_path):
124
  with open(file_path, 'r') as f:
125
  file_content = f.read()
126
  items = text_splitter.create_documents([file_content])
127
- for item in items:
128
- item = str(item)
129
- final_items.append(item)
130
  break
131
  except Exception as e:
132
  print(f"Error reading file {file_path}: {e}")
133
 
134
  return final_items
135
-
136
- import chromadb
137
 
138
  def create_vector_db(final_items):
139
- global db
140
  client = chromadb.Client()
141
  collection_name = "BioModelsRAG"
142
  from chromadb.utils import embedding_functions
@@ -144,8 +130,6 @@ def create_vector_db(final_items):
144
 
145
  # Initialize the database
146
  db = client.get_or_create_collection(name=collection_name)
147
- if db is None:
148
- raise ValueError("Db not created!")
149
  documents_to_add = []
150
  ids_to_add = []
151
 
@@ -163,12 +147,7 @@ def create_vector_db(final_items):
163
  if db.get(item_id) is None: # If the ID does not exist
164
  prompt = f"""
165
  Summarize the following segment of Antimony in a clear and concise manner:
166
- 1. Provide a detailed summary using a limited number of words
167
- 2. Maintain all original values and include any mathematical expressions or values in full.
168
- 3. Ensure that all variable names and their values are clearly presented.
169
- 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
170
-
171
- Here is the antimony segment to summarize: {item}
172
  """
173
 
174
  output = llm(
@@ -193,9 +172,6 @@ def create_vector_db(final_items):
193
  return db
194
 
195
  def generate_response(db, query_text, previous_context):
196
- if db is None:
197
- raise ValueError("Database not initialized.")
198
-
199
  query_results = db.query(
200
  query_texts=query_text,
201
  n_results=7,
@@ -204,21 +180,14 @@ def generate_response(db, query_text, previous_context):
204
  best_recommendation = query_results['documents']
205
 
206
  prompt_template = f"""
207
- Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
208
-
209
  Context:
210
  {previous_context} {best_recommendation}
211
 
212
- Instructions:
213
- 1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
214
- 2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
215
- 3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
216
-
217
  Question:
218
  {query_text}
219
- Once you are done summarizing, type 'END'.
220
  """
221
-
222
  from llama_cpp import Llama
223
 
224
  llm = Llama.from_pretrained(
@@ -245,13 +214,13 @@ def generate_response(db, query_text, previous_context):
245
  return full_response
246
 
247
  def streamlit_app():
248
- global db
249
  st.title("BioModelsRAG")
250
 
251
  search_str = st.text_input("Enter search query:")
252
 
253
  if search_str:
254
- models = search_models(search_str)
 
255
 
256
  if models:
257
  model_ids = list(models.keys())
@@ -267,26 +236,22 @@ def streamlit_app():
267
  model_data = models[model_id]
268
 
269
  st.write(f"Selected model: {model_data['name']}")
270
-
271
  model_url = model_data['url']
272
  model_file_path = download_model_file(model_url, model_id)
273
  antimony_file_path = model_file_path.replace(".xml", ".antimony")
274
 
275
  convert_sbml_to_antimony(model_file_path, antimony_file_path)
276
-
277
- # Ensure this returns items and not an empty list
278
  final_items.extend(split_biomodels(antimony_file_path))
279
 
280
- # Ensure final_items is not empty before creating the database
281
  if final_items:
282
  db = create_vector_db(final_items)
283
  st.write("Models have been processed and added to the database.")
284
  else:
285
  st.error("No items found in the models. Check if the Antimony files were generated correctly.")
286
 
287
- st.write("Models have processed and written to the database.")
288
 
289
-
290
  # Avoid caching the database initialization, or ensure it's properly updated.
291
  @st.cache_resource
292
  def get_messages():
 
1
+ import os
 
2
  import requests
3
  import tellurium as te
4
  import tempfile
 
6
  import chromadb
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
 
9
+ # Constants
10
  GITHUB_OWNER = "TheBobBob"
11
  GITHUB_REPO_CACHE = "BiomodelsCache"
12
  BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
13
  LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
14
 
 
 
 
15
  def fetch_github_json():
16
  url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
17
  headers = {"Accept": "application/vnd.github+json"}
 
28
  else:
29
  raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
30
 
31
+ def search_models(search_str, cached_data):
 
 
 
 
32
  query_text = search_str.strip().lower()
33
  models = {}
34
 
 
95
  print(f"Error converting SBML to Antimony: {e}")
96
 
97
  def split_biomodels(antimony_file_path):
 
98
  text_splitter = RecursiveCharacterTextSplitter(
99
  chunk_size=1000,
100
  chunk_overlap=20,
 
115
  with open(file_path, 'r') as f:
116
  file_content = f.read()
117
  items = text_splitter.create_documents([file_content])
118
+ final_items.extend(items)
 
 
119
  break
120
  except Exception as e:
121
  print(f"Error reading file {file_path}: {e}")
122
 
123
  return final_items
 
 
124
 
125
  def create_vector_db(final_items):
 
126
  client = chromadb.Client()
127
  collection_name = "BioModelsRAG"
128
  from chromadb.utils import embedding_functions
 
130
 
131
  # Initialize the database
132
  db = client.get_or_create_collection(name=collection_name)
 
 
133
  documents_to_add = []
134
  ids_to_add = []
135
 
 
147
  if db.get(item_id) is None: # If the ID does not exist
148
  prompt = f"""
149
  Summarize the following segment of Antimony in a clear and concise manner:
150
+ {item}
 
 
 
 
 
151
  """
152
 
153
  output = llm(
 
172
  return db
173
 
174
  def generate_response(db, query_text, previous_context):
 
 
 
175
  query_results = db.query(
176
  query_texts=query_text,
177
  n_results=7,
 
180
  best_recommendation = query_results['documents']
181
 
182
  prompt_template = f"""
183
+ Using the context provided below, answer the following question:
 
184
  Context:
185
  {previous_context} {best_recommendation}
186
 
 
 
 
 
 
187
  Question:
188
  {query_text}
 
189
  """
190
+
191
  from llama_cpp import Llama
192
 
193
  llm = Llama.from_pretrained(
 
214
  return full_response
215
 
216
  def streamlit_app():
 
217
  st.title("BioModelsRAG")
218
 
219
  search_str = st.text_input("Enter search query:")
220
 
221
  if search_str:
222
+ cached_data = fetch_github_json()
223
+ models = search_models(search_str, cached_data)
224
 
225
  if models:
226
  model_ids = list(models.keys())
 
236
  model_data = models[model_id]
237
 
238
  st.write(f"Selected model: {model_data['name']}")
239
+
240
  model_url = model_data['url']
241
  model_file_path = download_model_file(model_url, model_id)
242
  antimony_file_path = model_file_path.replace(".xml", ".antimony")
243
 
244
  convert_sbml_to_antimony(model_file_path, antimony_file_path)
 
 
245
  final_items.extend(split_biomodels(antimony_file_path))
246
 
 
247
  if final_items:
248
  db = create_vector_db(final_items)
249
  st.write("Models have been processed and added to the database.")
250
  else:
251
  st.error("No items found in the models. Check if the Antimony files were generated correctly.")
252
 
253
+ st.write("Models have been processed and written to the database.")
254
 
 
255
  # Avoid caching the database initialization, or ensure it's properly updated.
256
  @st.cache_resource
257
  def get_messages():