Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Running

App Files Files Community

TheBobBob commited on Sep 23, 2024

Commit

35f8d42

verified ·

1 Parent(s): 2456d3a

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -46

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
-#not being added to db properly, that is the problem
-import os
 import requests
 import tellurium as te
 import tempfile
@@ -7,15 +6,12 @@ import streamlit as st
 import chromadb
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-# Constants and global variables
 GITHUB_OWNER = "TheBobBob"
 GITHUB_REPO_CACHE = "BiomodelsCache"
 BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
 LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
-cached_data = None
-db = None
 def fetch_github_json():
     url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
     headers = {"Accept": "application/vnd.github+json"}
@@ -32,11 +28,7 @@ def fetch_github_json():
     else:
         raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
-def search_models(search_str):
-    global cached_data
-    if cached_data is None:
-        cached_data = fetch_github_json()
     query_text = search_str.strip().lower()
     models = {}
@@ -103,7 +95,6 @@ def convert_sbml_to_antimony(sbml_file_path, antimony_file_path):
         print(f"Error converting SBML to Antimony: {e}")
 def split_biomodels(antimony_file_path):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=20,
@@ -124,19 +115,14 @@ def split_biomodels(antimony_file_path):
             with open(file_path, 'r') as f:
                 file_content = f.read()
                 items = text_splitter.create_documents([file_content])
-                for item in items:
-                    item = str(item)
-                    final_items.append(item)
                 break
         except Exception as e:
             print(f"Error reading file {file_path}: {e}")
     return final_items
-import chromadb
 def create_vector_db(final_items):
-    global db
     client = chromadb.Client()
     collection_name = "BioModelsRAG"
     from chromadb.utils import embedding_functions
@@ -144,8 +130,6 @@ def create_vector_db(final_items):
     # Initialize the database
     db = client.get_or_create_collection(name=collection_name)
-    if db is None:
-        raise ValueError("Db not created!")
     documents_to_add = []
     ids_to_add = []
@@ -163,12 +147,7 @@ def create_vector_db(final_items):
         if db.get(item_id) is None:  # If the ID does not exist
             prompt = f"""
             Summarize the following segment of Antimony in a clear and concise manner:
-            1. Provide a detailed summary using a limited number of words
-            2. Maintain all original values and include any mathematical expressions or values in full.
-            3. Ensure that all variable names and their values are clearly presented.
-            4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
-            Here is the antimony segment to summarize: {item}
             """
             output = llm(
@@ -193,9 +172,6 @@ def create_vector_db(final_items):
     return db
 def generate_response(db, query_text, previous_context):
-    if db is None:
-        raise ValueError("Database not initialized.")
     query_results = db.query(
         query_texts=query_text,
         n_results=7,
@@ -204,21 +180,14 @@ def generate_response(db, query_text, previous_context):
     best_recommendation = query_results['documents']
     prompt_template = f"""
-    Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
     Context:
     {previous_context} {best_recommendation}
-    Instructions:
-    1. Cross-Reference: Use all provided context to define variables and identify any unknown entities.
-    2. Mathematical Calculations: Perform any necessary calculations based on the context and available data.
-    3. Consistency: Remember and incorporate previous responses if the question is related to earlier information.
     Question:
     {query_text}
-    Once you are done summarizing, type 'END'.
     """
     from llama_cpp import Llama
     llm = Llama.from_pretrained(
@@ -245,13 +214,13 @@ def generate_response(db, query_text, previous_context):
     return full_response
 def streamlit_app():
-    global db
     st.title("BioModelsRAG")
     search_str = st.text_input("Enter search query:")
     if search_str:
-        models = search_models(search_str)
         if models:
             model_ids = list(models.keys())
@@ -267,26 +236,22 @@ def streamlit_app():
                     model_data = models[model_id]
                     st.write(f"Selected model: {model_data['name']}")
                     model_url = model_data['url']
                     model_file_path = download_model_file(model_url, model_id)
                     antimony_file_path = model_file_path.replace(".xml", ".antimony")
                     convert_sbml_to_antimony(model_file_path, antimony_file_path)
-                    # Ensure this returns items and not an empty list
                     final_items.extend(split_biomodels(antimony_file_path))
-                # Ensure final_items is not empty before creating the database
                 if final_items:
                     db = create_vector_db(final_items)
                     st.write("Models have been processed and added to the database.")
                 else:
                     st.error("No items found in the models. Check if the Antimony files were generated correctly.")
-                st.write("Models have processed and written to the database.")
     # Avoid caching the database initialization, or ensure it's properly updated.
     @st.cache_resource
     def get_messages():

+import os
 import requests
 import tellurium as te
 import tempfile
 import chromadb
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+# Constants
 GITHUB_OWNER = "TheBobBob"
 GITHUB_REPO_CACHE = "BiomodelsCache"
 BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
 LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
 def fetch_github_json():
     url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_CACHE}/contents/{BIOMODELS_JSON_DB_PATH}"
     headers = {"Accept": "application/vnd.github+json"}
     else:
         raise ValueError(f"Unable to fetch model DB from GitHub repository: {GITHUB_OWNER} - {GITHUB_REPO_CACHE}")
+def search_models(search_str, cached_data):
     query_text = search_str.strip().lower()
     models = {}
         print(f"Error converting SBML to Antimony: {e}")
 def split_biomodels(antimony_file_path):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=20,
             with open(file_path, 'r') as f:
                 file_content = f.read()
                 items = text_splitter.create_documents([file_content])
+                final_items.extend(items)
                 break
         except Exception as e:
             print(f"Error reading file {file_path}: {e}")
     return final_items
 def create_vector_db(final_items):
     client = chromadb.Client()
     collection_name = "BioModelsRAG"
     from chromadb.utils import embedding_functions
     # Initialize the database
     db = client.get_or_create_collection(name=collection_name)
     documents_to_add = []
     ids_to_add = []
         if db.get(item_id) is None:  # If the ID does not exist
             prompt = f"""
             Summarize the following segment of Antimony in a clear and concise manner:
+            {item}
             """
             output = llm(
     return db
 def generate_response(db, query_text, previous_context):
     query_results = db.query(
         query_texts=query_text,
         n_results=7,
     best_recommendation = query_results['documents']
     prompt_template = f"""
+    Using the context provided below, answer the following question:
     Context:
     {previous_context} {best_recommendation}
     Question:
     {query_text}
     """
     from llama_cpp import Llama
     llm = Llama.from_pretrained(
     return full_response
 def streamlit_app():
     st.title("BioModelsRAG")
     search_str = st.text_input("Enter search query:")
     if search_str:
+        cached_data = fetch_github_json()
+        models = search_models(search_str, cached_data)
         if models:
             model_ids = list(models.keys())
                     model_data = models[model_id]
                     st.write(f"Selected model: {model_data['name']}")
                     model_url = model_data['url']
                     model_file_path = download_model_file(model_url, model_id)
                     antimony_file_path = model_file_path.replace(".xml", ".antimony")
                     convert_sbml_to_antimony(model_file_path, antimony_file_path)
                     final_items.extend(split_biomodels(antimony_file_path))
                 if final_items:
                     db = create_vector_db(final_items)
                     st.write("Models have been processed and added to the database.")
                 else:
                     st.error("No items found in the models. Check if the Antimony files were generated correctly.")
+                st.write("Models have been processed and written to the database.")
     # Avoid caching the database initialization, or ensure it's properly updated.
     @st.cache_resource
     def get_messages():