TheBobBob commited on
Commit
ed808e5
·
verified ·
1 Parent(s): 62f5a05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -251
app.py CHANGED
@@ -1,251 +1,239 @@
1
- import os
2
- import requests
3
- import tellurium as te
4
- import tempfile
5
- import ollama
6
- import streamlit as st
7
- from langchain_text_splitters import CharacterTextSplitter
8
- import chromadb
9
-
10
- # Constants and global variables
11
- GITHUB_OWNER = "sys-bio"
12
- GITHUB_REPO_CACHE = "BiomodelsCache"
13
- BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
14
- LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
15
-
16
- cached_data = None
17
- db = None
18
-
19
- def fetch_github_json():
20
- url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
21
- headers = {"Accept": "application/vnd.github+json"}
22
- response = requests.get(url, headers=headers)
23
-
24
- if response.status_code == 200:
25
- data = response.json()
26
- if "download_url" in data:
27
- file_url = data["download_url"]
28
- json_response = requests.get(file_url)
29
- return json_response.json()
30
- else:
31
- raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
32
- else:
33
- raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
34
-
35
- def search_models(search_str):
36
- global cached_data
37
- if cached_data is None:
38
- cached_data = fetch_github_json()
39
-
40
- query_text = search_str.strip().lower()
41
- models = {}
42
-
43
- for model_id, model_data in cached_data.items():
44
- if 'name' in model_data:
45
- name = model_data['name'].lower()
46
- url = model_data['url']
47
- id = model_data['model_id']
48
- title = model_data['title']
49
- authors = model_data['authors']
50
-
51
- if query_text:
52
- if ' ' in query_text:
53
- query_words = query_text.split(" ")
54
- if all(word in ' '.join([str(v).lower() for v in model_data.values()]) for word in query_words):
55
- models[model_id] = {
56
- 'ID': model_id,
57
- 'name': name,
58
- 'url': url,
59
- 'id': id,
60
- 'title': title,
61
- 'authors': authors,
62
- }
63
- else:
64
- if query_text in ' '.join([str(v).lower() for v in model_data.values()]):
65
- models[model_id] = {
66
- 'ID': model_id,
67
- 'name': name,
68
- 'url': url,
69
- 'id': id,
70
- 'title': title,
71
- 'authors': authors,
72
- }
73
-
74
- return models
75
-
76
- def download_model_file(model_url, model_id):
77
- model_url = f"https://raw.githubusercontent.com/konankisa/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
78
- response = requests.get(model_url)
79
-
80
- if response.status_code == 200:
81
- os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
82
- file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.xml")
83
-
84
- with open(file_path, 'wb') as file:
85
- file.write(response.content)
86
-
87
- print(f"Model {model_id} downloaded successfully: {file_path}")
88
- return file_path
89
- else:
90
- raise ValueError(f"Failed to download the model from {model_url}")
91
-
92
- def convert_sbml_to_antimony(sbml_file_path, antimony_file_path):
93
- try:
94
- r = te.loadSBMLModel(sbml_file_path)
95
- antimony_str = r.getCurrentAntimony()
96
-
97
- with open(antimony_file_path, 'w') as file:
98
- file.write(antimony_str)
99
-
100
- print(f"Successfully converted SBML to Antimony: {antimony_file_path}")
101
-
102
- except Exception as e:
103
- print(f"Error converting SBML to Antimony: {e}")
104
-
105
- def split_biomodels(antimony_file_path):
106
- text_splitter = CharacterTextSplitter(
107
- separator=" // ",
108
- chunk_size=1000,
109
- chunk_overlap=20,
110
- length_function=len,
111
- is_separator_regex=False
112
- )
113
-
114
- final_items = []
115
- directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
116
- if not os.path.isdir(directory_path):
117
- print(f"Directory not found: {directory_path}")
118
- return final_items
119
-
120
- files = os.listdir(directory_path)
121
- for file in files:
122
- file_path = os.path.join(directory_path, file)
123
- try:
124
- with open(file_path, 'r') as f:
125
- file_content = f.read()
126
- items = text_splitter.create_documents([file_content])
127
- for item in items:
128
- final_items.append(item)
129
- break
130
- except Exception as e:
131
- print(f"Error reading file {file_path}: {e}")
132
-
133
- return final_items
134
-
135
- def create_vector_db(final_items):
136
- global db
137
- client = chromadb.Client()
138
- db = client.create_collection(
139
- name="BioModelsRAG",
140
- metadata={"hnsw:space": "cosine"}
141
- )
142
- documents = []
143
-
144
- for item in final_items:
145
- prompt = f"""
146
- Summarize the following segment of Antimony in a clear and concise manner:
147
- 1. Provide a detailed summary using a limited number of words
148
- 2. Maintain all original values and include any mathematical expressions or values in full.
149
- 3. Ensure that all variable names and their values are clearly presented.
150
- 4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
151
-
152
- Here is the antimony segment to summarize: {item}
153
- """
154
- documents5 = ollama.generate(model="llama3", prompt=prompt)
155
- documents2 = documents5['response']
156
- documents.append(documents2)
157
-
158
- if final_items:
159
- db.add(
160
- documents=documents,
161
- ids=[f"id{i}" for i in range(len(final_items))]
162
- )
163
- return db
164
-
165
- def generate_response(db, query_text, previous_context):
166
- query_results = db.query(
167
- query_texts=query_text,
168
- n_results=5,
169
- )
170
-
171
- if not query_results.get('documents'):
172
- return "No results found."
173
-
174
- best_recommendation = query_results['documents']
175
-
176
- prompt_template = f"""
177
- Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
178
-
179
- Context:
180
- {previous_context} {best_recommendation}
181
-
182
- Instructions:
183
- 1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
184
- 2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
185
- 3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
186
-
187
- Question:
188
- {query_text}
189
-
190
- """
191
- response = ollama.generate(model="llama3", prompt=prompt_template)
192
- final_response = response.get('response', 'No response generated')
193
- return final_response
194
-
195
- def streamlit_app():
196
- st.title("BioModels Chat Interface")
197
-
198
- search_str = st.text_input("Enter search query:")
199
-
200
- if search_str:
201
- models = search_models(search_str)
202
-
203
- if models:
204
- model_ids = list(models.keys())
205
- selected_models = st.multiselect(
206
- "Select biomodels to analyze",
207
- options=model_ids,
208
- default=[model_ids[0]]
209
- )
210
-
211
- if st.button("Analyze Selected Models"):
212
- all_final_items = []
213
- for model_id in selected_models:
214
- model_data = models[model_id]
215
-
216
- st.write(f"Selected model: {model_data['name']}")
217
-
218
- model_url = model_data['url']
219
- model_file_path = download_model_file(model_url, model_id)
220
- antimony_file_path = model_file_path.replace(".xml", ".antimony")
221
-
222
- convert_sbml_to_antimony(model_file_path, antimony_file_path)
223
-
224
- final_items = split_biomodels(antimony_file_path)
225
- if not final_items:
226
- st.write("No content found in the biomodel.")
227
- continue
228
-
229
- all_final_items.extend(final_items)
230
-
231
- global db
232
- db = create_vector_db(all_final_items)
233
-
234
- if db:
235
- st.write("Models have been processed and added to the database.")
236
-
237
- user_query = st.text_input("Ask a question about the biomodels:")
238
-
239
- if user_query:
240
- if 'previous_context' not in st.session_state:
241
- st.session_state.previous_context = ""
242
-
243
- response = generate_response(db, user_query, st.session_state.previous_context)
244
- st.write(f"Response: {response}")
245
-
246
- st.session_state.previous_context += f"{response}\n"
247
- else:
248
- st.write("No models found for the given search query.")
249
-
250
- if __name__ == "__main__":
251
- streamlit_app()
 
1
+ import os
2
+ import requests
3
+ import tellurium as te
4
+ import tempfile
5
+ import streamlit as st
6
+ from langchain_text_splitters import CharacterTextSplitter
7
+ from transformers import pipeline
8
+ import chromadb
9
+
10
+ # Constants and global variables
11
+ GITHUB_OWNER = "sys-bio"
12
+ GITHUB_REPO_CACHE = "BiomodelsCache"
13
+ BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
14
+ LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
15
+
16
+ cached_data = None
17
+ db = None
18
+
19
+ # Initialize Hugging Face model pipelines
20
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
21
+ llm = pipeline("text-generation", model="gpt2")
22
+
23
+ def fetch_github_json():
24
+ url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
25
+ headers = {"Accept": "application/vnd.github+json"}
26
+ response = requests.get(url, headers=headers)
27
+
28
+ if response.status_code == 200:
29
+ data = response.json()
30
+ if "download_url" in data:
31
+ file_url = data["download_url"]
32
+ json_response = requests.get(file_url)
33
+ return json_response.json()
34
+ else:
35
+ raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
36
+ else:
37
+ raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
38
+
39
+ def search_models(search_str):
40
+ global cached_data
41
+ if cached_data is None:
42
+ cached_data = fetch_github_json()
43
+
44
+ query_text = search_str.strip().lower()
45
+ models = {}
46
+
47
+ for model_id, model_data in cached_data.items():
48
+ if 'name' in model_data:
49
+ name = model_data['name'].lower()
50
+ url = model_data['url']
51
+ id = model_data['model_id']
52
+ title = model_data['title']
53
+ authors = model_data['authors']
54
+
55
+ if query_text:
56
+ if ' ' in query_text:
57
+ query_words = query_text.split(" ")
58
+ if all(word in ' '.join([str(v).lower() for v in model_data.values()]) for word in query_words):
59
+ models[model_id] = {
60
+ 'ID': model_id,
61
+ 'name': name,
62
+ 'url': url,
63
+ 'id': id,
64
+ 'title': title,
65
+ 'authors': authors,
66
+ }
67
+ else:
68
+ if query_text in ' '.join([str(v).lower() for v in model_data.values()]):
69
+ models[model_id] = {
70
+ 'ID': model_id,
71
+ 'name': name,
72
+ 'url': url,
73
+ 'id': id,
74
+ 'title': title,
75
+ 'authors': authors,
76
+ }
77
+
78
+ return models
79
+
80
+ def download_model_file(model_url, model_id):
81
+ model_url = f"https://raw.githubusercontent.com/konankisa/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
82
+ response = requests.get(model_url)
83
+
84
+ if response.status_code == 200:
85
+ os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
86
+ file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.xml")
87
+
88
+ with open(file_path, 'wb') as file:
89
+ file.write(response.content)
90
+
91
+ print(f"Model {model_id} downloaded successfully: {file_path}")
92
+ return file_path
93
+ else:
94
+ raise ValueError(f"Failed to download the model from {model_url}")
95
+
96
+ def convert_sbml_to_antimony(sbml_file_path, antimony_file_path):
97
+ try:
98
+ r = te.loadSBMLModel(sbml_file_path)
99
+ antimony_str = r.getCurrentAntimony()
100
+
101
+ with open(antimony_file_path, 'w') as file:
102
+ file.write(antimony_str)
103
+
104
+ print(f"Successfully converted SBML to Antimony: {antimony_file_path}")
105
+
106
+ except Exception as e:
107
+ print(f"Error converting SBML to Antimony: {e}")
108
+
109
+ def split_biomodels(antimony_file_path):
110
+ text_splitter = CharacterTextSplitter(
111
+ separator=" // ",
112
+ chunk_size=1000,
113
+ chunk_overlap=20,
114
+ length_function=len,
115
+ is_separator_regex=False
116
+ )
117
+
118
+ final_items = []
119
+ directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
120
+ if not os.path.isdir(directory_path):
121
+ print(f"Directory not found: {directory_path}")
122
+ return final_items
123
+
124
+ files = os.listdir(directory_path)
125
+ for file in files:
126
+ file_path = os.path.join(directory_path, file)
127
+ try:
128
+ with open(file_path, 'r') as f:
129
+ file_content = f.read()
130
+ items = text_splitter.create_documents([file_content])
131
+ for item in items:
132
+ final_items.append(item)
133
+ break
134
+ except Exception as e:
135
+ print(f"Error reading file {file_path}: {e}")
136
+
137
+ return final_items
138
+
139
+ def create_vector_db(final_items):
140
+ global db
141
+ client = chromadb.Client()
142
+ db = client.create_collection(
143
+ name="BioModelsRAG",
144
+ metadata={"hnsw:space": "cosine"}
145
+ )
146
+ documents = []
147
+ print("VectorDB successfully created.")
148
+ for item in final_items:
149
+ prompt = f"""
150
+ Summarize the following segment of Antimony:
151
+ {item}
152
+ """
153
+ response = summarizer(prompt, max_length=150, min_length=30, do_sample=False)
154
+ summary = response[0]['summary_text']
155
+ documents.append(summary)
156
+
157
+ if final_items:
158
+ db.add(
159
+ documents=documents,
160
+ ids=[f"id{i}" for i in range(len(final_items))]
161
+ )
162
+ return db
163
+
164
+ def generate_response(db, query_text, previous_context):
165
+ query_results = db.query(
166
+ query_texts=query_text,
167
+ n_results=5,
168
+ )
169
+
170
+ if not query_results.get('documents'):
171
+ return "No results found."
172
+
173
+ best_recommendation = query_results['documents'][0]
174
+
175
+ prompt_template = f"""
176
+ Using the context below, answer the following question: {query_text}
177
+ Context: {previous_context} {best_recommendation}
178
+ """
179
+ response = llm(prompt_template, max_length=150)
180
+ final_response = response[0]['generated_text']
181
+ return final_response
182
+
183
+ def streamlit_app():
184
+ st.title("BioModels Chat Interface")
185
+
186
+ search_str = st.text_input("Enter search query:")
187
+
188
+ if search_str:
189
+ models = search_models(search_str)
190
+
191
+ if models:
192
+ model_ids = list(models.keys())
193
+ selected_models = st.multiselect(
194
+ "Select biomodels to analyze",
195
+ options=model_ids,
196
+ default=[model_ids[0]]
197
+ )
198
+
199
+ if st.button("Analyze Selected Models"):
200
+ all_final_items = []
201
+ for model_id in selected_models:
202
+ model_data = models[model_id]
203
+
204
+ st.write(f"Selected model: {model_data['name']}")
205
+
206
+ model_url = model_data['url']
207
+ model_file_path = download_model_file(model_url, model_id)
208
+ antimony_file_path = model_file_path.replace(".xml", ".antimony")
209
+
210
+ convert_sbml_to_antimony(model_file_path, antimony_file_path)
211
+
212
+ final_items = split_biomodels(antimony_file_path)
213
+ if not final_items:
214
+ st.write("No content found in the biomodel.")
215
+ continue
216
+
217
+ all_final_items.extend(final_items)
218
+
219
+ global db
220
+ db = create_vector_db(all_final_items)
221
+
222
+ if db:
223
+ st.write("Models have been processed and added to the database.")
224
+
225
+ user_query = st.text_input("Ask a question about the biomodels:")
226
+
227
+ if user_query:
228
+ if 'previous_context' not in st.session_state:
229
+ st.session_state.previous_context = ""
230
+
231
+ response = generate_response(db, user_query, st.session_state.previous_context)
232
+ st.write(f"Response: {response}")
233
+
234
+ st.session_state.previous_context += f"{response}\n"
235
+ else:
236
+ st.write("No models found for the given search query.")
237
+
238
+ if __name__ == "__main__":
239
+ streamlit_app()