Spaces:

cmcmaster
/

pbs_biologics_helper

Running

App Files Files Community

cmcmaster commited on Aug 2, 2024

Commit

13ba67a

verified ·

1 Parent(s): f60db99

Update main.py

Browse files

Files changed (1) hide show

main.py +60 -112

main.py CHANGED Viewed

@@ -1,11 +1,10 @@
 from fasthtml.common import *
-import sqlite3
-import os
 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 from pbs_data import PBSPublicDataAPIClient
 import os
-from fasthtml_hf import setup_hf_backup  # Add this line
 custom_css = Style("""
     body {
@@ -83,19 +82,31 @@ custom_css = Style("""
     }
 """)
-# Database file
-DB_FILE = 'rheumatology_biologics_data.db'
-if not os.path.exists(DB_FILE):
-    print(f"Database file {DB_FILE} does not exist!")
 def load_data():
     try:
-        conn = sqlite3.connect(DB_FILE)
-        cursor = conn.cursor()
-    except sqlite3.Error as e:
-        print(f"An error occurred: {e}")
         return {
             'combinations': [],
             'drugs': [],
@@ -106,86 +117,38 @@ def load_data():
             'hospital_types': []
         }
-    # Fetch all data
-    cursor.execute('''SELECT c.pbs_code, d.name as drug, b.name as brand, f.name as formulation,
-                      i.name as indication, tp.name as treatment_phase, c.streamlined_code,
-                      c.online_application, c.authority_method, ht.name as hospital_type
-                      FROM combinations c
-                      JOIN drugs d ON c.drug_id = d.id
-                      JOIN brands b ON c.brand_id = b.id
-                      JOIN formulations f ON c.formulation_id = f.id
-                      JOIN indications i ON c.indication_id = i.id
-                      JOIN treatment_phases tp ON c.treatment_phase_id = tp.id
-                      JOIN hospital_types ht ON c.hospital_type_id = ht.id''')
-    data = cursor.fetchall()
-    # Fetch distinct values for dropdowns
-    cursor.execute('SELECT name FROM drugs')
-    drugs = [row[0] for row in cursor.fetchall()]
-    cursor.execute('SELECT name FROM brands')
-    brands = [row[0] for row in cursor.fetchall()]
-    cursor.execute('SELECT name FROM formulations')
-    formulations = [row[0] for row in cursor.fetchall()]
-    cursor.execute('SELECT name FROM indications')
-    indications = [row[0] for row in cursor.fetchall()]
-    cursor.execute('SELECT name FROM treatment_phases')
-    treatment_phases = [row[0] for row in cursor.fetchall()]
-    cursor.execute('SELECT name FROM hospital_types')
-    hospital_types = [row[0] for row in cursor.fetchall()]
-    conn.close()
-    return {
-        'combinations': data,
-        'drugs': drugs,
-        'brands': brands,
-        'formulations': formulations,
-        'indications': indications,
-        'treatment_phases': treatment_phases,
-        'hospital_types': hospital_types
-    }
 biologics_data = load_data()
 app, rt = fast_app()
 def search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type):
-    field_indices = {
-        'drug': 1,
-        'brand': 2,
-        'formulation': 3,
-        'indication': 4,
-        'treatment_phase': 5,
-        'hospital_type': 9
-    }
-    results = [combo for combo in biologics_data['combinations'] if all(
-        not value or combo[field_indices[key]] == value
-        for key, value in {'drug': drug, 'brand': brand, 'formulation': formulation, 'indication': indication, 'treatment_phase': treatment_phase, 'hospital_type': hospital_type}.items()
-    )]
-    if not results:
         return "No results found."
     output = ""
     for item in results:
         output += f"""
         <div class="result-item">
-            <h2>{item[1]} ({item[2]})</h2>
-            <p><strong>PBS Code:</strong> <a href="https://www.pbs.gov.au/medicine/item/{item[0]}" target="_blank">{item[0]}</a></p>
-            <p><strong>Formulation:</strong> {item[3]}</p>
-            <p><strong>Indication:</strong> {item[4]}</p>
-            <p><strong>Treatment Phase:</strong> {item[5]}</p>
-            <p><strong>Streamlined Code:</strong> {item[6] or 'N/A'}</p>
-            <p><strong>Authority Method:</strong> {item[8]}</p>
-            <p><strong>Online Application:</strong> {'Yes' if item[7] else 'No'}</p>
-            <p><strong>Hospital Type:</strong> {item[9]}</p>
         </div>
         <hr>
         """
@@ -193,36 +156,22 @@ def search_biologics(drug, brand, formulation, indication, treatment_phase, hosp
     return output
 def update_options(drug, brand, formulation, indication, treatment_phase, hospital_type):
-    selected = {
-        'drug': drug,
-        'brand': brand,
-        'formulation': formulation,
-        'indication': indication,
-        'treatment_phase': treatment_phase,
-        'hospital_type': hospital_type
-    }
-    field_indices = {
-        'drug': 1,
-        'brand': 2,
-        'formulation': 3,
-        'indication': 4,
-        'treatment_phase': 5,
-        'hospital_type': 9
-    }
-    filtered = [combo for combo in biologics_data['combinations'] if all(
-        not selected[key] or combo[field_indices[key]] == selected[key]
-        for key in selected
-    )]
     options = {
-        'drugs': sorted(set(combo[1] for combo in filtered)),
-        'brands': sorted(set(combo[2] for combo in filtered)),
-        'formulations': sorted(set(combo[3] for combo in filtered)),
-        'indications': sorted(set(combo[4] for combo in filtered)),
-        'treatment_phases': sorted(set(combo[5] for combo in filtered)),
-        'hospital_types': sorted(set(combo[9] for combo in filtered))
     }
     return options
@@ -352,13 +301,12 @@ def post(drug: str = '', brand: str = '', formulation: str = '', indication: str
     results = search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type)
     return results
 def update_data():
     print(f"Updating data at {datetime.datetime.now()}")
     client = PBSPublicDataAPIClient("2384af7c667342ceb5a736fe29f1dc6b", rate_limit=0.2)
     try:
         data = client.fetch_rheumatology_biologics_data()
-        client.save_data_to_sqlite(data, DB_FILE)
         print("Data updated successfully")
         global biologics_data
         biologics_data = load_data()
@@ -367,8 +315,8 @@ def update_data():
 # Set up the scheduler
 scheduler = BackgroundScheduler()
-scheduler.add_job(func=update_data, trigger='interval', minutes=10)
 scheduler.start()
-setup_hf_backup(app)  # Add this line
 serve()

 from fasthtml.common import *
+from datasets import load_dataset
 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 from pbs_data import PBSPublicDataAPIClient
 import os
+from fasthtml_hf import setup_hf_backup
 custom_css = Style("""
     body {
     }
 """)
+DATASET_NAME = "cmcmaster/rheumatology-biologics-dataset"
 def load_data():
     try:
+        dataset = load_dataset(DATASET_NAME, split="train")
+        # Create sets for dropdown options
+        drugs = set(dataset['drug'])
+        brands = set(dataset['brand'])
+        formulations = set(dataset['formulation'])
+        indications = set(dataset['indication'])
+        treatment_phases = set(dataset['treatment_phase'])
+        hospital_types = set(dataset['hospital_type'])
+        return {
+            'combinations': dataset,
+            'drugs': sorted(drugs),
+            'brands': sorted(brands),
+            'formulations': sorted(formulations),
+            'indications': sorted(indications),
+            'treatment_phases': sorted(treatment_phases),
+            'hospital_types': sorted(hospital_types)
+        }
+    except Exception as e:
+        print(f"An error occurred while loading data: {str(e)}")
         return {
             'combinations': [],
             'drugs': [],
             'hospital_types': []
         }
 biologics_data = load_data()
 app, rt = fast_app()
 def search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type):
+    results = biologics_data['combinations'].filter(
+        lambda x: (not drug or x['drug'] == drug) and
+                  (not brand or x['brand'] == brand) and
+                  (not formulation or x['formulation'] == formulation) and
+                  (not indication or x['indication'] == indication) and
+                  (not treatment_phase or x['treatment_phase'] == treatment_phase) and
+                  (not hospital_type or x['hospital_type'] == hospital_type)
+    )
+    if len(results) == 0:
         return "No results found."
     output = ""
     for item in results:
         output += f"""
         <div class="result-item">
+            <h2>{item['drug']} ({item['brand']})</h2>
+            <p><strong>PBS Code:</strong> <a href="https://www.pbs.gov.au/medicine/item/{item['pbs_code']}" target="_blank">{item['pbs_code']}</a></p>
+            <p><strong>Formulation:</strong> {item['formulation']}</p>
+            <p><strong>Indication:</strong> {item['indication']}</p>
+            <p><strong>Treatment Phase:</strong> {item['treatment_phase']}</p>
+            <p><strong>Streamlined Code:</strong> {item['streamlined_code'] or 'N/A'}</p>
+            <p><strong>Authority Method:</strong> {item['authority_method']}</p>
+            <p><strong>Online Application:</strong> {'Yes' if item['online_application'] else 'No'}</p>
+            <p><strong>Hospital Type:</strong> {item['hospital_type']}</p>
+            <p><strong>Schedule Year:</strong> {item['schedule_year']}</p>
+            <p><strong>Schedule Month:</strong> {item['schedule_month']}</p>
         </div>
         <hr>
         """
     return output
 def update_options(drug, brand, formulation, indication, treatment_phase, hospital_type):
+    filtered = biologics_data['combinations'].filter(
+        lambda x: (not drug or x['drug'] == drug) and
+                  (not brand or x['brand'] == brand) and
+                  (not formulation or x['formulation'] == formulation) and
+                  (not indication or x['indication'] == indication) and
+                  (not treatment_phase or x['treatment_phase'] == treatment_phase) and
+                  (not hospital_type or x['hospital_type'] == hospital_type)
+    )
     options = {
+        'drugs': sorted(set(filtered['drug'])),
+        'brands': sorted(set(filtered['brand'])),
+        'formulations': sorted(set(filtered['formulation'])),
+        'indications': sorted(set(filtered['indication'])),
+        'treatment_phases': sorted(set(filtered['treatment_phase'])),
+        'hospital_types': sorted(set(filtered['hospital_type']))
     }
     return options
     results = search_biologics(drug, brand, formulation, indication, treatment_phase, hospital_type)
     return results
 def update_data():
     print(f"Updating data at {datetime.datetime.now()}")
     client = PBSPublicDataAPIClient("2384af7c667342ceb5a736fe29f1dc6b", rate_limit=0.2)
     try:
         data = client.fetch_rheumatology_biologics_data()
+        client.save_data_to_hf(data, DATASET_NAME)
         print("Data updated successfully")
         global biologics_data
         biologics_data = load_data()
 # Set up the scheduler
 scheduler = BackgroundScheduler()
+scheduler.add_job(func=update_data, trigger='interval', days=1)
 scheduler.start()
+setup_hf_backup(app)
 serve()