Spaces:
Runtime error
Runtime error
File size: 4,894 Bytes
fe60fa2 fe569a7 fe60fa2 1174165 8373e84 1174165 ddfe2fc fe569a7 91701f5 fe60fa2 fe569a7 fe60fa2 fe569a7 fe60fa2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings
import firebase_admin
from firebase_admin import credentials, storage
import json, os, dotenv
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
try:
os.environ["FIREBASE_CREDENTIAL"] = dotenv.get_key(dotenv.find_dotenv(), "FIREBASE_CREDENTIAL")
if os.environ.get["FIREBASE_CREDENTIAL"] == None:
raise TypeError
except TypeError:
import streamlit as st
os.environ["FIREBASE_CREDENTIAL"] = st.secrets["FIREBASE_CREDENTIAL"]
cred = credentials.Certificate(json.loads(str(os.environ.get("FIREBASE_CREDENTIAL")), strict=False))
if not firebase_admin._apps:
firebase_admin.initialize_app(cred, {'storageBucket': 'healthhack-store.appspot.com'})
def get_csv(index_name, isDiseases = True):
while index_name[-1]=="/":
index_name = index_name[:-1]
dir = index_name.split("/")
## Check if path exists locally
for i in range(len(dir)):
path = '/'.join(dir[:i+1])
if not os.path.exists(path):
os.mkdir(path)
## Check if file exists locally, get from blob
if (not os.path.exists(index_name+"/diseases.csv") or
not os.path.exists(index_name+"/network.csv")
):
bucket = storage.bucket()
blob = bucket.blob(f"{index_name}/diseases.csv")
blob.download_to_filename(f"{index_name}/diseases.csv")
bucket = storage.bucket()
blob = bucket.blob(f"{index_name}/network.csv")
blob.download_to_filename(f"{index_name}/network.csv")
## load store from local
if isDiseases:
store = pd.read_csv(f"{index_name}/diseases.csv")
return store
else:
store = pd.read_csv(f"{index_name}/network.csv")
return store
def get_store(index_name, embeddings = None):
while index_name[-1]=="/":
index_name = index_name[:-1]
dir = index_name.split("/")
## Check if path exists locally
for i in range(len(dir)):
path = '/'.join(dir[:i+1])
if not os.path.exists(path):
os.mkdir(path)
## Check if file exists locally, get from blob
if (not os.path.exists(index_name+"/index.faiss") or
not os.path.exists(index_name+"/index.pkl")
):
bucket = storage.bucket()
blob = bucket.blob(f"{index_name}/index.pkl")
blob.download_to_filename(f"{index_name}/index.pkl")
bucket = storage.bucket()
blob = bucket.blob(f"{index_name}/index.faiss")
blob.download_to_filename(f"{index_name}/index.faiss")
## check embeddings, default to BGE
if embeddings is None:
model_name = "bge-large-en-v1.5"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceBgeEmbeddings(
# model_name=model_name,
model_kwargs = model_kwargs,
encode_kwargs = encode_kwargs)
## load store from local
store = FAISS.load_local(index_name, embeddings)
return store
def update_csv_from_local(index_name):
while index_name[-1]=="/":
index_name = index_name[:-1]
pathdir = index_name.split("/")
## Check if path exists locally
for i in range(len(pathdir)):
path = '/'.join(pathdir[:i+1])
if not os.path.exists(path):
raise Exception("Index name does not exist locally")
if (not os.path.exists(index_name+"/diseases.csv") or
not os.path.exists(index_name+"/network.csv")
):
raise("Index is missing some files (network.csv, diseases.csv)")
## Update store
bucket = storage.bucket()
blob = bucket.blob(index_name+"/diseases.csv")
blob.upload_from_filename(index_name+"/diseases.csv")
blob = bucket.blob(index_name+"/network.csv")
blob.upload_from_filename(index_name+"/network.csv")
return True
def update_store_from_local(index_name):
while index_name[-1]=="/":
index_name = index_name[:-1]
pathdir = index_name.split("/")
## Check if path exists locally
for i in range(len(pathdir)):
path = '/'.join(pathdir[:i+1])
if not os.path.exists(path):
raise Exception("Index name does not exist locally")
## Check if file exists locally, get from blob
if (not os.path.exists(index_name+"/index.faiss") or
not os.path.exists(index_name+"/index.pkl")
):
raise("Index is missing some files (index.faiss, index.pkl)")
## Update store
bucket = storage.bucket()
blob = bucket.blob(index_name+"/index.faiss")
blob.upload_from_filename(index_name+"/index.faiss")
blob = bucket.blob(index_name+"/index.pkl")
blob.upload_from_filename(index_name+"/index.pkl")
return True
if __name__ == "__main__":
print("y r u running dis") |