File size: 3,129 Bytes
fe60fa2
 
 
 
 
 
 
1174165
 
8373e84
 
1174165
 
 
ddfe2fc
91701f5
 
 
fe60fa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings
import firebase_admin
from firebase_admin import credentials, storage
import json, os, dotenv
from dotenv import load_dotenv
load_dotenv()
try:
    os.environ["FIREBASE_CREDENTIAL"] = dotenv.get_key(dotenv.find_dotenv(), "FIREBASE_CREDENTIAL")
    if os.environ.get["FIREBASE_CREDENTIAL"] == None:
        raise TypeError
except TypeError:
    import streamlit as st
    os.environ["FIREBASE_CREDENTIAL"] = st.secrets["FIREBASE_CREDENTIAL"]
cred = credentials.Certificate(json.loads(str(os.environ.get("FIREBASE_CREDENTIAL")), strict=False))
# firebase_admin.initialize_app(cred,{'storageBucket': 'healthhack-store.appspot.com'}) # connecting to firebase
if not firebase_admin._apps:
    firebase_admin.initialize_app(cred, {'storageBucket': 'healthhack-store.appspot.com'})


def get_store(index_name, embeddings = None):
    while index_name[-1]=="/":
        index_name = index_name[:-1]
    dir = index_name.split("/")
    
    ## Check if path exists locally
    for i in range(len(dir)):
        path = '/'.join(dir[:i+1])
        if not os.path.exists(path):
            os.mkdir(path)

    ## Check if file exists locally, get from blob
    if (not os.path.exists(index_name+"/index.faiss") or
        not os.path.exists(index_name+"/index.pkl")
        ):
        bucket = storage.bucket()
        blob = bucket.blob(f"{index_name}/index.pkl")
        blob.download_to_filename(f"{index_name}/index.pkl")
        bucket = storage.bucket()
        blob = bucket.blob(f"{index_name}/index.faiss")
        blob.download_to_filename(f"{index_name}/index.faiss")
    
    ## check embeddings, default to BGE
    if embeddings is None:
        model_name = "bge-large-en-v1.5"
        model_kwargs = {"device": "cpu"}
        encode_kwargs = {"normalize_embeddings": True}
        embeddings = HuggingFaceBgeEmbeddings(
            # model_name=model_name, 
            model_kwargs = model_kwargs,
            encode_kwargs = encode_kwargs)

    ## load store from local
    store = FAISS.load_local(index_name, embeddings)
    return store

def update_store_from_local(index_name):
    while index_name[-1]=="/":
        index_name = index_name[:-1]
    pathdir = index_name.split("/")
    
    ## Check if path exists locally
    for i in range(len(pathdir)):
        path = '/'.join(pathdir[:i+1])
        if not os.path.exists(path):
            raise Exception("Index name does not exist locally")

    ## Check if file exists locally, get from blob
    if (not os.path.exists(index_name+"/index.faiss") or
        not os.path.exists(index_name+"/index.pkl")
        ):
        raise("Index is missing some files (index.faiss, index.pkl)")
    
    ## Update store
    bucket = storage.bucket()
    blob = bucket.blob(index_name+"/index.faiss")
    blob.upload_from_filename(index_name+"/index.faiss")
    blob = bucket.blob(index_name+"/index.pkl")
    blob.upload_from_filename(index_name+"/index.pkl")
    return True
    
    

if __name__ == "__main__":
    print("y r u running dis")