Yew Chong commited on
Commit
fe60fa2
0 Parent(s):

first commit

Browse files
Files changed (4) hide show
  1. .gitignore +21 -0
  2. db_firestore.py +78 -0
  3. requirements.txt +17 -0
  4. streamlit/app.py +60 -0
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # env
2
+ .env*
3
+ healthhack-store-firebase-adminsdk-sq7ne-32400d49de.json
4
+ /venv
5
+
6
+ # notebooks
7
+ *.ipynb
8
+ /.ipynb_checkpoints
9
+
10
+ # docs
11
+ /docs
12
+ docs.zip
13
+
14
+ # indexes
15
+ indexes/
16
+
17
+ # firebase
18
+ .firebase
19
+
20
+ # others
21
+ *.log
db_firestore.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import FAISS
2
+ from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings
3
+ import firebase_admin
4
+ from firebase_admin import credentials, storage
5
+ import json, os, dotenv
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+
9
+ os.environ["FIREBASE_CREDENTIAL"] = dotenv.get_key(dotenv.find_dotenv(), "FIREBASE_CREDENTIAL")
10
+ cred = credentials.Certificate(json.loads(os.environ.get("FIREBASE_CREDENTIAL")))
11
+ firebase_admin.initialize_app(cred,{'storageBucket': 'healthhack-store.appspot.com'}) # connecting to firebase
12
+
13
+
14
+ def get_store(index_name, embeddings = None):
15
+ while index_name[-1]=="/":
16
+ index_name = index_name[:-1]
17
+ dir = index_name.split("/")
18
+
19
+ ## Check if path exists locally
20
+ for i in range(len(dir)):
21
+ path = '/'.join(dir[:i+1])
22
+ if not os.path.exists(path):
23
+ os.mkdir(path)
24
+
25
+ ## Check if file exists locally, get from blob
26
+ if (not os.path.exists(index_name+"/index.faiss") or
27
+ not os.path.exists(index_name+"/index.pkl")
28
+ ):
29
+ bucket = storage.bucket()
30
+ blob = bucket.blob(f"{index_name}/index.pkl")
31
+ blob.download_to_filename(f"{index_name}/index.pkl")
32
+ bucket = storage.bucket()
33
+ blob = bucket.blob(f"{index_name}/index.faiss")
34
+ blob.download_to_filename(f"{index_name}/index.faiss")
35
+
36
+ ## check embeddings, default to BGE
37
+ if embeddings is None:
38
+ model_name = "bge-large-en-v1.5"
39
+ model_kwargs = {"device": "cpu"}
40
+ encode_kwargs = {"normalize_embeddings": True}
41
+ embeddings = HuggingFaceBgeEmbeddings(
42
+ # model_name=model_name,
43
+ model_kwargs = model_kwargs,
44
+ encode_kwargs = encode_kwargs)
45
+
46
+ ## load store from local
47
+ store = FAISS.load_local(index_name, embeddings)
48
+ return store
49
+
50
+ def update_store_from_local(index_name):
51
+ while index_name[-1]=="/":
52
+ index_name = index_name[:-1]
53
+ pathdir = index_name.split("/")
54
+
55
+ ## Check if path exists locally
56
+ for i in range(len(pathdir)):
57
+ path = '/'.join(pathdir[:i+1])
58
+ if not os.path.exists(path):
59
+ raise Exception("Index name does not exist locally")
60
+
61
+ ## Check if file exists locally, get from blob
62
+ if (not os.path.exists(index_name+"/index.faiss") or
63
+ not os.path.exists(index_name+"/index.pkl")
64
+ ):
65
+ raise("Index is missing some files (index.faiss, index.pkl)")
66
+
67
+ ## Update store
68
+ bucket = storage.bucket()
69
+ blob = bucket.blob(index_name+"/index.faiss")
70
+ blob.upload_from_filename(index_name+"/index.faiss")
71
+ blob = bucket.blob(index_name+"/index.pkl")
72
+ blob.upload_from_filename(index_name+"/index.pkl")
73
+ return True
74
+
75
+
76
+
77
+ if __name__ == "__main__":
78
+ print("y r u running dis")
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ langchain==0.0.352
3
+ langchain-openai
4
+ sentence_transformers
5
+ python-dotenv
6
+ numexpr
7
+ ipywidgets
8
+ ipykernel
9
+ pypdf
10
+ aspose-words
11
+ tiktoken
12
+ faiss-cpu
13
+ streamlit
14
+ firebase-admin
15
+
16
+ --index-url https://download.pytorch.org/whl/cu113
17
+ torch==2.1.2
streamlit/app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain_openai import AzureOpenAIEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain.docstore.document import Document
7
+
8
+ import openai
9
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
10
+ import tiktoken
11
+
12
+ import os
13
+
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+ if not os.environ.get("OPENAI_API_KEY"):
17
+ raise Exception("No OpenAI Key detected")
18
+
19
+ embeddings = OpenAIEmbeddings(deployment="textembedding", chunk_size = 16, api_key = os.environ["OPENAI_API_KEY"])
20
+ index_name = "SCLC"
21
+ store = FAISS.load_local(index_name, embeddings)
22
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
23
+
24
+ from langchain.prompts.few_shot import FewShotPromptTemplate
25
+ from langchain.prompts.prompt import PromptTemplate
26
+ from operator import itemgetter
27
+ from langchain.schema import StrOutputParser
28
+ from langchain_core.output_parsers import StrOutputParser
29
+ from langchain_core.runnables import RunnablePassthrough
30
+
31
+ TEMPLATE = """You are a chatbot.
32
+ Here is the context:
33
+ {context}
34
+ ----------------------------------------------------------------
35
+ You are to reply the following question, with reference to the above context.
36
+ Question:
37
+ {question}
38
+ ----------------------------------------------------------------
39
+ Your reply:
40
+ """
41
+
42
+ prompt = PromptTemplate(
43
+ input_variables = ["question", "context"],
44
+ template = TEMPLATE
45
+ )
46
+ retriever = store.as_retriever(search_type="similarity", search_kwargs={"k":2})
47
+ def format_docs(docs):
48
+ return "\n--------------------\n".join(doc.page_content for doc in docs)
49
+
50
+ chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()} |
51
+ prompt |
52
+ llm |
53
+ StrOutputParser()
54
+ )
55
+
56
+
57
+ st.title("test")
58
+
59
+ t = st.text_input("Input")
60
+ st.write(chain.invoke(t))