File size: 4,894 Bytes
fe60fa2
 
 
 
 
fe569a7
fe60fa2
 
1174165
 
8373e84
 
1174165
 
 
ddfe2fc
fe569a7
91701f5
 
fe60fa2
fe569a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe60fa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe569a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe60fa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings
import firebase_admin
from firebase_admin import credentials, storage
import json, os, dotenv
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
try:
    os.environ["FIREBASE_CREDENTIAL"] = dotenv.get_key(dotenv.find_dotenv(), "FIREBASE_CREDENTIAL")
    if os.environ.get["FIREBASE_CREDENTIAL"] == None:
        raise TypeError
except TypeError:
    import streamlit as st
    os.environ["FIREBASE_CREDENTIAL"] = st.secrets["FIREBASE_CREDENTIAL"]
cred = credentials.Certificate(json.loads(str(os.environ.get("FIREBASE_CREDENTIAL")), strict=False))

if not firebase_admin._apps:
    firebase_admin.initialize_app(cred, {'storageBucket': 'healthhack-store.appspot.com'})

def get_csv(index_name, isDiseases = True):
    while index_name[-1]=="/":
        index_name = index_name[:-1]
    dir = index_name.split("/")
    
    ## Check if path exists locally
    for i in range(len(dir)):
        path = '/'.join(dir[:i+1])
        if not os.path.exists(path):
            os.mkdir(path)

    ## Check if file exists locally, get from blob
    if (not os.path.exists(index_name+"/diseases.csv") or
        not os.path.exists(index_name+"/network.csv")
        ):
        bucket = storage.bucket()
        blob = bucket.blob(f"{index_name}/diseases.csv")
        blob.download_to_filename(f"{index_name}/diseases.csv")
        bucket = storage.bucket()
        blob = bucket.blob(f"{index_name}/network.csv")
        blob.download_to_filename(f"{index_name}/network.csv")

    ## load store from local
    if isDiseases:
        store = pd.read_csv(f"{index_name}/diseases.csv")
        return store
    else:
        store = pd.read_csv(f"{index_name}/network.csv")
        return store

def get_store(index_name, embeddings = None):
    while index_name[-1]=="/":
        index_name = index_name[:-1]
    dir = index_name.split("/")
    
    ## Check if path exists locally
    for i in range(len(dir)):
        path = '/'.join(dir[:i+1])
        if not os.path.exists(path):
            os.mkdir(path)

    ## Check if file exists locally, get from blob
    if (not os.path.exists(index_name+"/index.faiss") or
        not os.path.exists(index_name+"/index.pkl")
        ):
        bucket = storage.bucket()
        blob = bucket.blob(f"{index_name}/index.pkl")
        blob.download_to_filename(f"{index_name}/index.pkl")
        bucket = storage.bucket()
        blob = bucket.blob(f"{index_name}/index.faiss")
        blob.download_to_filename(f"{index_name}/index.faiss")
    
    ## check embeddings, default to BGE
    if embeddings is None:
        model_name = "bge-large-en-v1.5"
        model_kwargs = {"device": "cpu"}
        encode_kwargs = {"normalize_embeddings": True}
        embeddings = HuggingFaceBgeEmbeddings(
            # model_name=model_name, 
            model_kwargs = model_kwargs,
            encode_kwargs = encode_kwargs)

    ## load store from local
    store = FAISS.load_local(index_name, embeddings)
    return store

def update_csv_from_local(index_name):
    while index_name[-1]=="/":
        index_name = index_name[:-1]
    pathdir = index_name.split("/")
    
    ## Check if path exists locally
    for i in range(len(pathdir)):
        path = '/'.join(pathdir[:i+1])
        if not os.path.exists(path):
            raise Exception("Index name does not exist locally")

    if (not os.path.exists(index_name+"/diseases.csv") or
        not os.path.exists(index_name+"/network.csv")
        ):
        raise("Index is missing some files (network.csv, diseases.csv)")
    
    ## Update store
    bucket = storage.bucket()
    blob = bucket.blob(index_name+"/diseases.csv")
    blob.upload_from_filename(index_name+"/diseases.csv")
    blob = bucket.blob(index_name+"/network.csv")
    blob.upload_from_filename(index_name+"/network.csv")
    return True

def update_store_from_local(index_name):
    while index_name[-1]=="/":
        index_name = index_name[:-1]
    pathdir = index_name.split("/")
    
    ## Check if path exists locally
    for i in range(len(pathdir)):
        path = '/'.join(pathdir[:i+1])
        if not os.path.exists(path):
            raise Exception("Index name does not exist locally")

    ## Check if file exists locally, get from blob
    if (not os.path.exists(index_name+"/index.faiss") or
        not os.path.exists(index_name+"/index.pkl")
        ):
        raise("Index is missing some files (index.faiss, index.pkl)")
    
    ## Update store
    bucket = storage.bucket()
    blob = bucket.blob(index_name+"/index.faiss")
    blob.upload_from_filename(index_name+"/index.faiss")
    blob = bucket.blob(index_name+"/index.pkl")
    blob.upload_from_filename(index_name+"/index.pkl")
    return True
    
    

if __name__ == "__main__":
    print("y r u running dis")