Spaces:

Michaelj1
/

MedQA-BI

Running

Michaelj1 commited on Dec 29, 2024

Commit

55b3aef

1 Parent(s): eab4a95

download submodule

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,9 +7,11 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import tempfile
 import os
 class MedicalRAG:
     def __init__(self, embed_path, pmids_path, content_path):
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         # Load data
         self.embeddings = np.load(embed_path)
@@ -18,6 +20,19 @@ class MedicalRAG:
         # Setup models
         self.encoder, self.tokenizer = self._setup_encoder()
         self.generator = self._setup_generator()
     def _create_faiss_index(self, embeddings):
         index = faiss.IndexFlatIP(768)  # 768 is embedding dimension

 import matplotlib.pyplot as plt
 import tempfile
 import os
+import subprocess
 class MedicalRAG:
     def __init__(self, embed_path, pmids_path, content_path):
+        self.download_files()
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         # Load data
         self.embeddings = np.load(embed_path)
         # Setup models
         self.encoder, self.tokenizer = self._setup_encoder()
         self.generator = self._setup_generator()
+    def download_files(self):
+        urls = [
+            "https://ftp.ncbi.nlm.nih.gov/pub/lu/MedCPT/pubmed_embeddings/embeds_chunk_36.npy",
+            "https://ftp.ncbi.nlm.nih.gov/pub/lu/MedCPT/pubmed_embeddings/pmids_chunk_36.json",
+            "https://ftp.ncbi.nlm.nih.gov/pub/lu/MedCPT/pubmed_embeddings/pubmed_chunk_36.json"
+        ]
+        for url in urls:
+            file_name = url.split('/')[-1]
+            if not os.path.exists(file_name):
+                print(f"Downloading {file_name}...")
+                subprocess.run(["wget", url], check=True)
+            else:
+                print(f"{file_name} already exists. Skipping download.")
     def _create_faiss_index(self, embeddings):
         index = faiss.IndexFlatIP(768)  # 768 is embedding dimension