Spaces:
Runtime error
Runtime error
Yew Chong
commited on
Commit
•
fe60fa2
0
Parent(s):
first commit
Browse files- .gitignore +21 -0
- db_firestore.py +78 -0
- requirements.txt +17 -0
- streamlit/app.py +60 -0
.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# env
|
2 |
+
.env*
|
3 |
+
healthhack-store-firebase-adminsdk-sq7ne-32400d49de.json
|
4 |
+
/venv
|
5 |
+
|
6 |
+
# notebooks
|
7 |
+
*.ipynb
|
8 |
+
/.ipynb_checkpoints
|
9 |
+
|
10 |
+
# docs
|
11 |
+
/docs
|
12 |
+
docs.zip
|
13 |
+
|
14 |
+
# indexes
|
15 |
+
indexes/
|
16 |
+
|
17 |
+
# firebase
|
18 |
+
.firebase
|
19 |
+
|
20 |
+
# others
|
21 |
+
*.log
|
db_firestore.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.vectorstores import FAISS
|
2 |
+
from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings
|
3 |
+
import firebase_admin
|
4 |
+
from firebase_admin import credentials, storage
|
5 |
+
import json, os, dotenv
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
os.environ["FIREBASE_CREDENTIAL"] = dotenv.get_key(dotenv.find_dotenv(), "FIREBASE_CREDENTIAL")
|
10 |
+
cred = credentials.Certificate(json.loads(os.environ.get("FIREBASE_CREDENTIAL")))
|
11 |
+
firebase_admin.initialize_app(cred,{'storageBucket': 'healthhack-store.appspot.com'}) # connecting to firebase
|
12 |
+
|
13 |
+
|
14 |
+
def get_store(index_name, embeddings = None):
|
15 |
+
while index_name[-1]=="/":
|
16 |
+
index_name = index_name[:-1]
|
17 |
+
dir = index_name.split("/")
|
18 |
+
|
19 |
+
## Check if path exists locally
|
20 |
+
for i in range(len(dir)):
|
21 |
+
path = '/'.join(dir[:i+1])
|
22 |
+
if not os.path.exists(path):
|
23 |
+
os.mkdir(path)
|
24 |
+
|
25 |
+
## Check if file exists locally, get from blob
|
26 |
+
if (not os.path.exists(index_name+"/index.faiss") or
|
27 |
+
not os.path.exists(index_name+"/index.pkl")
|
28 |
+
):
|
29 |
+
bucket = storage.bucket()
|
30 |
+
blob = bucket.blob(f"{index_name}/index.pkl")
|
31 |
+
blob.download_to_filename(f"{index_name}/index.pkl")
|
32 |
+
bucket = storage.bucket()
|
33 |
+
blob = bucket.blob(f"{index_name}/index.faiss")
|
34 |
+
blob.download_to_filename(f"{index_name}/index.faiss")
|
35 |
+
|
36 |
+
## check embeddings, default to BGE
|
37 |
+
if embeddings is None:
|
38 |
+
model_name = "bge-large-en-v1.5"
|
39 |
+
model_kwargs = {"device": "cpu"}
|
40 |
+
encode_kwargs = {"normalize_embeddings": True}
|
41 |
+
embeddings = HuggingFaceBgeEmbeddings(
|
42 |
+
# model_name=model_name,
|
43 |
+
model_kwargs = model_kwargs,
|
44 |
+
encode_kwargs = encode_kwargs)
|
45 |
+
|
46 |
+
## load store from local
|
47 |
+
store = FAISS.load_local(index_name, embeddings)
|
48 |
+
return store
|
49 |
+
|
50 |
+
def update_store_from_local(index_name):
|
51 |
+
while index_name[-1]=="/":
|
52 |
+
index_name = index_name[:-1]
|
53 |
+
pathdir = index_name.split("/")
|
54 |
+
|
55 |
+
## Check if path exists locally
|
56 |
+
for i in range(len(pathdir)):
|
57 |
+
path = '/'.join(pathdir[:i+1])
|
58 |
+
if not os.path.exists(path):
|
59 |
+
raise Exception("Index name does not exist locally")
|
60 |
+
|
61 |
+
## Check if file exists locally, get from blob
|
62 |
+
if (not os.path.exists(index_name+"/index.faiss") or
|
63 |
+
not os.path.exists(index_name+"/index.pkl")
|
64 |
+
):
|
65 |
+
raise("Index is missing some files (index.faiss, index.pkl)")
|
66 |
+
|
67 |
+
## Update store
|
68 |
+
bucket = storage.bucket()
|
69 |
+
blob = bucket.blob(index_name+"/index.faiss")
|
70 |
+
blob.upload_from_filename(index_name+"/index.faiss")
|
71 |
+
blob = bucket.blob(index_name+"/index.pkl")
|
72 |
+
blob.upload_from_filename(index_name+"/index.pkl")
|
73 |
+
return True
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
if __name__ == "__main__":
|
78 |
+
print("y r u running dis")
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
langchain==0.0.352
|
3 |
+
langchain-openai
|
4 |
+
sentence_transformers
|
5 |
+
python-dotenv
|
6 |
+
numexpr
|
7 |
+
ipywidgets
|
8 |
+
ipykernel
|
9 |
+
pypdf
|
10 |
+
aspose-words
|
11 |
+
tiktoken
|
12 |
+
faiss-cpu
|
13 |
+
streamlit
|
14 |
+
firebase-admin
|
15 |
+
|
16 |
+
--index-url https://download.pytorch.org/whl/cu113
|
17 |
+
torch==2.1.2
|
streamlit/app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_community.document_loaders import TextLoader
|
3 |
+
from langchain_openai import AzureOpenAIEmbeddings
|
4 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
+
from langchain.docstore.document import Document
|
7 |
+
|
8 |
+
import openai
|
9 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
10 |
+
import tiktoken
|
11 |
+
|
12 |
+
import os
|
13 |
+
|
14 |
+
from dotenv import load_dotenv
|
15 |
+
load_dotenv()
|
16 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
17 |
+
raise Exception("No OpenAI Key detected")
|
18 |
+
|
19 |
+
embeddings = OpenAIEmbeddings(deployment="textembedding", chunk_size = 16, api_key = os.environ["OPENAI_API_KEY"])
|
20 |
+
index_name = "SCLC"
|
21 |
+
store = FAISS.load_local(index_name, embeddings)
|
22 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
23 |
+
|
24 |
+
from langchain.prompts.few_shot import FewShotPromptTemplate
|
25 |
+
from langchain.prompts.prompt import PromptTemplate
|
26 |
+
from operator import itemgetter
|
27 |
+
from langchain.schema import StrOutputParser
|
28 |
+
from langchain_core.output_parsers import StrOutputParser
|
29 |
+
from langchain_core.runnables import RunnablePassthrough
|
30 |
+
|
31 |
+
TEMPLATE = """You are a chatbot.
|
32 |
+
Here is the context:
|
33 |
+
{context}
|
34 |
+
----------------------------------------------------------------
|
35 |
+
You are to reply the following question, with reference to the above context.
|
36 |
+
Question:
|
37 |
+
{question}
|
38 |
+
----------------------------------------------------------------
|
39 |
+
Your reply:
|
40 |
+
"""
|
41 |
+
|
42 |
+
prompt = PromptTemplate(
|
43 |
+
input_variables = ["question", "context"],
|
44 |
+
template = TEMPLATE
|
45 |
+
)
|
46 |
+
retriever = store.as_retriever(search_type="similarity", search_kwargs={"k":2})
|
47 |
+
def format_docs(docs):
|
48 |
+
return "\n--------------------\n".join(doc.page_content for doc in docs)
|
49 |
+
|
50 |
+
chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()} |
|
51 |
+
prompt |
|
52 |
+
llm |
|
53 |
+
StrOutputParser()
|
54 |
+
)
|
55 |
+
|
56 |
+
|
57 |
+
st.title("test")
|
58 |
+
|
59 |
+
t = st.text_input("Input")
|
60 |
+
st.write(chain.invoke(t))
|