Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Running

App Files Files Community

TheBobBob commited on Dec 23, 2024

Commit

22ab655

verified ·

1 Parent(s): f226547

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -35

app.py CHANGED Viewed

@@ -10,17 +10,12 @@ import libsbml
 import networkx as nx
 from pyvis.network import Network
 client = chromadb.Client()
 collection_name = "BioModelsRAG"
 global db
 db = client.get_or_create_collection(name=collection_name)
-#Todolists
-#1. if MODEL (cannot download) don't even include (TICK)
-#2. switch the choosing and groq api key so if they just want to visualize thats fine (TICK)
 class BioModelFetcher:
     def __init__(self, github_owner="TheBobBob", github_repo_cache="BiomodelsCache", biomodels_json_db_path="src/cached_biomodels.json"):
@@ -121,7 +116,7 @@ class BioModelSplitter:
     def __init__(self, groq_api_key):
         self.groq_client = Groq(api_key=groq_api_key)
-    def split_biomodels(self, antimony_file_path, models):
         text_splitter = CharacterTextSplitter(
             separator="  // ",
             chunk_size=1000,
@@ -130,33 +125,19 @@ class BioModelSplitter:
             is_separator_regex=False,
         )
-        directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
-        files = os.listdir(directory_path)
-        for file in files:
-            file_path = os.path.join(directory_path, file)
-            try:
-                with open(file_path, 'r') as f:
-                    file_content = f.read()
-                    items = text_splitter.create_documents([file_content])
-                    self.create_vector_db(items, models)
-                    break
-            except Exception as e:
-                print(f"Error reading file {file_path}: {e}")
         return db
-    def create_vector_db(self, final_items, models):
         counter = 0
-        for model_id in models:
-            try:
-                results = db.get(where={"document": {"$eq": model_id}})
-                #might be a problem here?
-                if results['documents']:
-                    continue
-                #could also be a problem in how the IDs are created
                 for item in final_items:
                     counter += 1  # Increment counter for each item
                     item_id = f"{counter}_{model_id}"
@@ -168,6 +149,7 @@ class BioModelSplitter:
                     2. Maintain all original values and include any mathematical expressions or values in full.
                     3. Ensure that all variable names and their values are clearly presented.
                     4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
                     Segment of Antimony: {item}
                     """
@@ -185,10 +167,11 @@ class BioModelSplitter:
                             metadatas=[{"document": model_id}],
                             documents=[chat_completion.choices[0].message.content],
                         )
                     else:
                         print(f"Error: No content returned from Groq for model {model_id}.")
-            except Exception as e:
-                print(f"Error processing model {model_id}: {e}")
 class SBMLNetworkVisualizer:
@@ -286,6 +269,7 @@ class StreamlitApp:
             if models:
                 model_ids = list(models.keys())
                 model_ids = [model_id for model_id in model_ids if not str(model_id).startswith("MODEL")]
                 if models:
                     selected_models = st.multiselect(
                         "Select biomodels to analyze",
@@ -303,7 +287,7 @@ class StreamlitApp:
                         net = self.visualizer.sbml_to_network(model_file_path)
-                        st.subheader(f"Model: {model_data['title']}")
                         net.show(f"sbml_network_{model_id}.html")
                         HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
@@ -324,7 +308,7 @@ class StreamlitApp:
                         antimony_file_path = model_file_path.replace(".xml", ".txt")
                         AntimonyConverter.convert_sbml_to_antimony(model_file_path, antimony_file_path)
-                        self.splitter.split_biomodels(antimony_file_path, selected_models)
                         st.info(f"Model {model_id} {model_data['name']} has successfully been added to the database! :) ")
@@ -352,21 +336,25 @@ class StreamlitApp:
                 n_results=5,
                 where={"document": {"$eq": model_id}},
             )
             best_recommendation = query_results['documents']
             flat_recommendation = [item for sublist in best_recommendation for item in (sublist if isinstance(sublist, list) else [sublist])]
             query_results_final += "\n\n".join(flat_recommendation) + "\n\n"
         prompt_template = f"""
         Using the context and previous conversation provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:
         Context:
         {query_results_final}
         Previous Conversation:
         {history}
         Instructions:
         1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
         2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
         3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
         Question:
         {prompt}
         """

 import networkx as nx
 from pyvis.network import Network
 client = chromadb.Client()
 collection_name = "BioModelsRAG"
 global db
 db = client.get_or_create_collection(name=collection_name)
 class BioModelFetcher:
     def __init__(self, github_owner="TheBobBob", github_repo_cache="BiomodelsCache", biomodels_json_db_path="src/cached_biomodels.json"):
     def __init__(self, groq_api_key):
         self.groq_client = Groq(api_key=groq_api_key)
+    def split_biomodels(self, antimony_file_path, models, model_id):
         text_splitter = CharacterTextSplitter(
             separator="  // ",
             chunk_size=1000,
             is_separator_regex=False,
         )
+        with open(antimony_file_path) as f:
+            file_content = f.read()
+        items = text_splitter.create_documents([file_content])
+        self.create_vector_db(items, model_id)
         return db
+    def create_vector_db(self, final_items, model_id):
         counter = 0
+        try:
+            results = db.get(where={"document": model_id})
+            chromadb.api.client.SharedSystemClient.clear_system_cache()
+            if len(results['documents']) == 0:
                 for item in final_items:
                     counter += 1  # Increment counter for each item
                     item_id = f"{counter}_{model_id}"
                     2. Maintain all original values and include any mathematical expressions or values in full.
                     3. Ensure that all variable names and their values are clearly presented.
                     4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
                     Segment of Antimony: {item}
                     """
                             metadatas=[{"document": model_id}],
                             documents=[chat_completion.choices[0].message.content],
                         )
+                        chromadb.api.client.SharedSystemClient.clear_system_cache()
                     else:
                         print(f"Error: No content returned from Groq for model {model_id}.")
+        except Exception as e:
+            print(f"Error processing model {model_id}: {e}")
 class SBMLNetworkVisualizer:
             if models:
                 model_ids = list(models.keys())
                 model_ids = [model_id for model_id in model_ids if not str(model_id).startswith("MODEL")]
                 if models:
                     selected_models = st.multiselect(
                         "Select biomodels to analyze",
                         net = self.visualizer.sbml_to_network(model_file_path)
+                        st.subheader(f"Model {model_data['title']}")
                         net.show(f"sbml_network_{model_id}.html")
                         HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
                         antimony_file_path = model_file_path.replace(".xml", ".txt")
                         AntimonyConverter.convert_sbml_to_antimony(model_file_path, antimony_file_path)
+                        self.splitter.split_biomodels(antimony_file_path, selected_models, model_id)
                         st.info(f"Model {model_id} {model_data['name']} has successfully been added to the database! :) ")
                 n_results=5,
                 where={"document": {"$eq": model_id}},
             )
+            chromadb.api.client.SharedSystemClient.clear_system_cache()
             best_recommendation = query_results['documents']
             flat_recommendation = [item for sublist in best_recommendation for item in (sublist if isinstance(sublist, list) else [sublist])]
             query_results_final += "\n\n".join(flat_recommendation) + "\n\n"
         prompt_template = f"""
         Using the context and previous conversation provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:
         Context:
         {query_results_final}
         Previous Conversation:
         {history}
         Instructions:
         1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
         2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
         3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
         Question:
         {prompt}
         """