Spaces:
Build error
Build error
Reformated code; updated linting
Browse files- app.py +4 -47
- chatbot/__init__.py +0 -0
- chatbot/utils.py +50 -0
- setup.cfg +2 -1
- shell/format.sh +3 -3
- shell/lint.sh +3 -3
app.py
CHANGED
@@ -1,14 +1,8 @@
|
|
1 |
import json
|
2 |
-
import os
|
3 |
|
4 |
import streamlit as st
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from llama_index import SimpleDirectoryReader
|
8 |
-
from llama_index import VectorStoreIndex
|
9 |
-
from llama_index import set_global_service_context
|
10 |
-
from llama_index.embeddings import OpenAIEmbedding
|
11 |
-
from llama_index.llms import AzureOpenAI
|
12 |
|
13 |
# Initialize message history
|
14 |
st.header("Chat with André's research 💬 📚")
|
@@ -21,47 +15,10 @@ with open(r"config.json") as config_file:
|
|
21 |
config_details = json.load(config_file)
|
22 |
|
23 |
|
24 |
-
def download_test_data():
|
25 |
-
url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
|
26 |
-
with st.spinner(text="Downloading test data. Might take a few seconds."):
|
27 |
-
download_folder(url, quiet=True, use_cookies=False, output="./data/")
|
28 |
-
|
29 |
-
|
30 |
-
@st.cache_resource(show_spinner=False)
|
31 |
-
def load_data():
|
32 |
-
with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
|
33 |
-
documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
|
34 |
-
llm = AzureOpenAI(
|
35 |
-
model="gpt-3.5-turbo",
|
36 |
-
engine=config_details["ENGINE"],
|
37 |
-
temperature=0.5,
|
38 |
-
api_key=os.getenv("OPENAI_API_KEY"),
|
39 |
-
api_base=config_details["OPENAI_API_BASE"],
|
40 |
-
api_type="azure",
|
41 |
-
api_version=config_details["OPENAI_API_VERSION"],
|
42 |
-
system_prompt="You are an expert on André's research and your job is to answer"
|
43 |
-
"technical questions. Assume that all questions are related to"
|
44 |
-
"André's research. Keep your answers technical and based on facts"
|
45 |
-
" – do not hallucinate features.",
|
46 |
-
)
|
47 |
-
# You need to deploy your own embedding model as well as your own chat completion model
|
48 |
-
embed_model = OpenAIEmbedding(
|
49 |
-
model="text-embedding-ada-002",
|
50 |
-
deployment_name=config_details["ENGINE_EMBEDDING"],
|
51 |
-
api_key=os.getenv("OPENAI_API_KEY"),
|
52 |
-
api_base=config_details["OPENAI_API_BASE"],
|
53 |
-
api_type="azure",
|
54 |
-
api_version=config_details["OPENAI_API_VERSION"],
|
55 |
-
)
|
56 |
-
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
57 |
-
set_global_service_context(service_context)
|
58 |
-
index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
|
59 |
-
return index
|
60 |
-
|
61 |
-
|
62 |
def main():
|
|
|
63 |
download_test_data()
|
64 |
-
index = load_data()
|
65 |
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
|
66 |
|
67 |
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
|
|
|
1 |
import json
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
+
from src.utils import download_test_data
|
5 |
+
from src.utils import load_data
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Initialize message history
|
8 |
st.header("Chat with André's research 💬 📚")
|
|
|
15 |
config_details = json.load(config_file)
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def main():
|
19 |
+
# setup dataset
|
20 |
download_test_data()
|
21 |
+
index = load_data(config_details)
|
22 |
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
|
23 |
|
24 |
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
|
chatbot/__init__.py
ADDED
File without changes
|
chatbot/utils.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from gdown import download_folder
|
5 |
+
from llama_index import ServiceContext
|
6 |
+
from llama_index import SimpleDirectoryReader
|
7 |
+
from llama_index import VectorStoreIndex
|
8 |
+
from llama_index import set_global_service_context
|
9 |
+
from llama_index.embeddings import OpenAIEmbedding
|
10 |
+
from llama_index.llms import AzureOpenAI
|
11 |
+
|
12 |
+
|
13 |
+
@st.cache_resource(show_spinner=False)
|
14 |
+
def download_test_data():
|
15 |
+
# url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}"
|
16 |
+
url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
|
17 |
+
with st.spinner(text="Downloading test data. Might take a few seconds."):
|
18 |
+
download_folder(url=url, quiet=False, use_cookies=False, output="./data/")
|
19 |
+
|
20 |
+
|
21 |
+
@st.cache_resource(show_spinner=False)
|
22 |
+
def load_data(config_details):
|
23 |
+
with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
|
24 |
+
documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
|
25 |
+
llm = AzureOpenAI(
|
26 |
+
model="gpt-3.5-turbo",
|
27 |
+
engine=config_details["ENGINE"],
|
28 |
+
temperature=0.5,
|
29 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
30 |
+
api_base=config_details["OPENAI_API_BASE"],
|
31 |
+
api_type="azure",
|
32 |
+
api_version=config_details["OPENAI_API_VERSION"],
|
33 |
+
system_prompt="You are an expert on André's research and your job is to answer"
|
34 |
+
"technical questions. Assume that all questions are related to"
|
35 |
+
"André's research. Keep your answers technical and based on facts"
|
36 |
+
" – do not hallucinate features.",
|
37 |
+
)
|
38 |
+
# You need to deploy your own embedding model as well as your own chat completion model
|
39 |
+
embed_model = OpenAIEmbedding(
|
40 |
+
model="text-embedding-ada-002",
|
41 |
+
deployment_name=config_details["ENGINE_EMBEDDING"],
|
42 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
43 |
+
api_base=config_details["OPENAI_API_BASE"],
|
44 |
+
api_type="azure",
|
45 |
+
api_version=config_details["OPENAI_API_VERSION"],
|
46 |
+
)
|
47 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
48 |
+
set_global_service_context(service_context)
|
49 |
+
index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
|
50 |
+
return index
|
setup.cfg
CHANGED
@@ -3,7 +3,7 @@ description-file = README.md
|
|
3 |
|
4 |
[isort]
|
5 |
force_single_line=True
|
6 |
-
known_first_party=
|
7 |
line_length=120
|
8 |
profile=black
|
9 |
|
@@ -12,3 +12,4 @@ profile=black
|
|
12 |
per-file-ignores=*__init__.py:F401
|
13 |
ignore=E203,W503,W605,F632,E266,E731,E712,E741
|
14 |
max-line-length=120
|
|
|
|
3 |
|
4 |
[isort]
|
5 |
force_single_line=True
|
6 |
+
known_first_party=chatbot
|
7 |
line_length=120
|
8 |
profile=black
|
9 |
|
|
|
12 |
per-file-ignores=*__init__.py:F401
|
13 |
ignore=E203,W503,W605,F632,E266,E731,E712,E741
|
14 |
max-line-length=120
|
15 |
+
exclude=venv/
|
shell/format.sh
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
#!/bin/bash
|
2 |
-
isort --sl
|
3 |
-
black --line-length 120
|
4 |
-
flake8
|
|
|
1 |
#!/bin/bash
|
2 |
+
isort --sl .
|
3 |
+
black --line-length 120 .
|
4 |
+
flake8 .
|
shell/lint.sh
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
#!/bin/bash
|
2 |
-
isort --check --sl -c
|
3 |
if ! [ $? -eq 0 ]
|
4 |
then
|
5 |
echo "Please run \"sh shell/format.sh\" to format the code."
|
6 |
exit 1
|
7 |
fi
|
8 |
echo "no issues with isort"
|
9 |
-
flake8
|
10 |
if ! [ $? -eq 0 ]
|
11 |
then
|
12 |
echo "Please fix the code style issue."
|
13 |
exit 1
|
14 |
fi
|
15 |
echo "no issues with flake8"
|
16 |
-
black --check --line-length 120
|
17 |
if ! [ $? -eq 0 ]
|
18 |
then
|
19 |
echo "Please run \"sh shell/format.sh\" to format the code."
|
|
|
1 |
#!/bin/bash
|
2 |
+
isort --check --sl -c .
|
3 |
if ! [ $? -eq 0 ]
|
4 |
then
|
5 |
echo "Please run \"sh shell/format.sh\" to format the code."
|
6 |
exit 1
|
7 |
fi
|
8 |
echo "no issues with isort"
|
9 |
+
flake8 .
|
10 |
if ! [ $? -eq 0 ]
|
11 |
then
|
12 |
echo "Please fix the code style issue."
|
13 |
exit 1
|
14 |
fi
|
15 |
echo "no issues with flake8"
|
16 |
+
black --check --line-length 120 .
|
17 |
if ! [ $? -eq 0 ]
|
18 |
then
|
19 |
echo "Please run \"sh shell/format.sh\" to format the code."
|