TheBobBob commited on
Commit
62f5a05
·
verified ·
1 Parent(s): 445908a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +251 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import tellurium as te
4
+ import tempfile
5
+ import ollama
6
+ import streamlit as st
7
+ from langchain_text_splitters import CharacterTextSplitter
8
+ import chromadb
9
+
10
+ # Constants and global variables
11
+ GITHUB_OWNER = "sys-bio"
12
+ GITHUB_REPO_CACHE = "BiomodelsCache"
13
+ BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
14
+ LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
15
+
16
+ cached_data = None
17
+ db = None
18
+
19
+ def fetch_github_json():
20
+ url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
21
+ headers = {"Accept": "application/vnd.github+json"}
22
+ response = requests.get(url, headers=headers)
23
+
24
+ if response.status_code == 200:
25
+ data = response.json()
26
+ if "download_url" in data:
27
+ file_url = data["download_url"]
28
+ json_response = requests.get(file_url)
29
+ return json_response.json()
30
+ else:
31
+ raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
32
+ else:
33
+ raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
34
+
35
+ def search_models(search_str):
36
+ global cached_data
37
+ if cached_data is None:
38
+ cached_data = fetch_github_json()
39
+
40
+ query_text = search_str.strip().lower()
41
+ models = {}
42
+
43
+ for model_id, model_data in cached_data.items():
44
+ if 'name' in model_data:
45
+ name = model_data['name'].lower()
46
+ url = model_data['url']
47
+ id = model_data['model_id']
48
+ title = model_data['title']
49
+ authors = model_data['authors']
50
+
51
+ if query_text:
52
+ if ' ' in query_text:
53
+ query_words = query_text.split(" ")
54
+ if all(word in ' '.join([str(v).lower() for v in model_data.values()]) for word in query_words):
55
+ models[model_id] = {
56
+ 'ID': model_id,
57
+ 'name': name,
58
+ 'url': url,
59
+ 'id': id,
60
+ 'title': title,
61
+ 'authors': authors,
62
+ }
63
+ else:
64
+ if query_text in ' '.join([str(v).lower() for v in model_data.values()]):
65
+ models[model_id] = {
66
+ 'ID': model_id,
67
+ 'name': name,
68
+ 'url': url,
69
+ 'id': id,
70
+ 'title': title,
71
+ 'authors': authors,
72
+ }
73
+
74
+ return models
75
+
76
+ def download_model_file(model_url, model_id):
77
+ model_url = f"https://raw.githubusercontent.com/konankisa/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
78
+ response = requests.get(model_url)
79
+
80
+ if response.status_code == 200:
81
+ os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
82
+ file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.xml")
83
+
84
+ with open(file_path, 'wb') as file:
85
+ file.write(response.content)
86
+
87
+ print(f"Model {model_id} downloaded successfully: {file_path}")
88
+ return file_path
89
+ else:
90
+ raise ValueError(f"Failed to download the model from {model_url}")
91
+
92
+ def convert_sbml_to_antimony(sbml_file_path, antimony_file_path):
93
+ try:
94
+ r = te.loadSBMLModel(sbml_file_path)
95
+ antimony_str = r.getCurrentAntimony()
96
+
97
+ with open(antimony_file_path, 'w') as file:
98
+ file.write(antimony_str)
99
+
100
+ print(f"Successfully converted SBML to Antimony: {antimony_file_path}")
101
+
102
+ except Exception as e:
103
+ print(f"Error converting SBML to Antimony: {e}")
104
+
105
+ def split_biomodels(antimony_file_path):
106
+ text_splitter = CharacterTextSplitter(
107
+ separator=" // ",
108
+ chunk_size=1000,
109
+ chunk_overlap=20,
110
+ length_function=len,
111
+ is_separator_regex=False
112
+ )
113
+
114
+ final_items = []
115
+ directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
116
+ if not os.path.isdir(directory_path):
117
+ print(f"Directory not found: {directory_path}")
118
+ return final_items
119
+
120
+ files = os.listdir(directory_path)
121
+ for file in files:
122
+ file_path = os.path.join(directory_path, file)
123
+ try:
124
+ with open(file_path, 'r') as f:
125
+ file_content = f.read()
126
+ items = text_splitter.create_documents([file_content])
127
+ for item in items:
128
+ final_items.append(item)
129
+ break
130
+ except Exception as e:
131
+ print(f"Error reading file {file_path}: {e}")
132
+
133
+ return final_items
134
+
135
+ def create_vector_db(final_items):
136
+ global db
137
+ client = chromadb.Client()
138
+ db = client.create_collection(
139
+ name="BioModelsRAG",
140
+ metadata={"hnsw:space": "cosine"}
141
+ )
142
+ documents = []
143
+
144
+ for item in final_items:
145
+ prompt = f"""
146
+ Summarize the following segment of Antimony in a clear and concise manner:
147
+ 1. Provide a detailed summary using a limited number of words
148
+ 2. Maintain all original values and include any mathematical expressions or values in full.
149
+ 3. Ensure that all variable names and their values are clearly presented.
150
+ 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
151
+
152
+ Here is the antimony segment to summarize: {item}
153
+ """
154
+ documents5 = ollama.generate(model="llama3", prompt=prompt)
155
+ documents2 = documents5['response']
156
+ documents.append(documents2)
157
+
158
+ if final_items:
159
+ db.add(
160
+ documents=documents,
161
+ ids=[f"id{i}" for i in range(len(final_items))]
162
+ )
163
+ return db
164
+
165
+ def generate_response(db, query_text, previous_context):
166
+ query_results = db.query(
167
+ query_texts=query_text,
168
+ n_results=5,
169
+ )
170
+
171
+ if not query_results.get('documents'):
172
+ return "No results found."
173
+
174
+ best_recommendation = query_results['documents']
175
+
176
+ prompt_template = f"""
177
+ Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
178
+
179
+ Context:
180
+ {previous_context} {best_recommendation}
181
+
182
+ Instructions:
183
+ 1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
184
+ 2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
185
+ 3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
186
+
187
+ Question:
188
+ {query_text}
189
+
190
+ """
191
+ response = ollama.generate(model="llama3", prompt=prompt_template)
192
+ final_response = response.get('response', 'No response generated')
193
+ return final_response
194
+
195
+ def streamlit_app():
196
+ st.title("BioModels Chat Interface")
197
+
198
+ search_str = st.text_input("Enter search query:")
199
+
200
+ if search_str:
201
+ models = search_models(search_str)
202
+
203
+ if models:
204
+ model_ids = list(models.keys())
205
+ selected_models = st.multiselect(
206
+ "Select biomodels to analyze",
207
+ options=model_ids,
208
+ default=[model_ids[0]]
209
+ )
210
+
211
+ if st.button("Analyze Selected Models"):
212
+ all_final_items = []
213
+ for model_id in selected_models:
214
+ model_data = models[model_id]
215
+
216
+ st.write(f"Selected model: {model_data['name']}")
217
+
218
+ model_url = model_data['url']
219
+ model_file_path = download_model_file(model_url, model_id)
220
+ antimony_file_path = model_file_path.replace(".xml", ".antimony")
221
+
222
+ convert_sbml_to_antimony(model_file_path, antimony_file_path)
223
+
224
+ final_items = split_biomodels(antimony_file_path)
225
+ if not final_items:
226
+ st.write("No content found in the biomodel.")
227
+ continue
228
+
229
+ all_final_items.extend(final_items)
230
+
231
+ global db
232
+ db = create_vector_db(all_final_items)
233
+
234
+ if db:
235
+ st.write("Models have been processed and added to the database.")
236
+
237
+ user_query = st.text_input("Ask a question about the biomodels:")
238
+
239
+ if user_query:
240
+ if 'previous_context' not in st.session_state:
241
+ st.session_state.previous_context = ""
242
+
243
+ response = generate_response(db, user_query, st.session_state.previous_context)
244
+ st.write(f"Response: {response}")
245
+
246
+ st.session_state.previous_context += f"{response}\n"
247
+ else:
248
+ st.write("No models found for the given search query.")
249
+
250
+ if __name__ == "__main__":
251
+ streamlit_app()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ requests==2.32.2
2
+ tellurium==2.2.10
3
+ ollama==0.3.2
4
+ streamlit==1.23.0
5
+ langchain-text-splitters==0.2.4
6
+ chromadb==0.5.5