Spaces:
Sleeping
Sleeping
Commit
·
19cf5e6
1
Parent(s):
936d627
Freemind demo
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +251 -0
- models/freemind/embeddings/freemind.csv +0 -0
- requirements.txt +33 -0
- src/AI/__init__.py +0 -0
- src/AI/ai_configs.py +84 -0
- src/AI/chatbot_demo.py +112 -0
- src/AI/embedding.py +323 -0
- src/AI/evaluation.py +101 -0
- src/AI/klever_search.py +73 -0
- src/AI/parsing.py +7 -0
- src/AI/search.py +219 -0
- src/AI/training.py +95 -0
- src/__init__.py +0 -0
- src/backend/TTChatBot/.sample-env +24 -0
- src/backend/TTChatBot/chatbot/__init__.py +0 -0
- src/backend/TTChatBot/chatbot/admin.py +3 -0
- src/backend/TTChatBot/chatbot/apps.py +242 -0
- src/backend/TTChatBot/chatbot/exceptions.py +10 -0
- src/backend/TTChatBot/chatbot/migrations/__init__.py +0 -0
- src/backend/TTChatBot/chatbot/serializers.py +25 -0
- src/backend/TTChatBot/chatbot/tasks.py +79 -0
- src/backend/TTChatBot/chatbot/urls.py +31 -0
- src/backend/TTChatBot/chatbot/utils.py +108 -0
- src/backend/TTChatBot/chatbot/views.py +199 -0
- src/backend/TTChatBot/config/__init__.py +3 -0
- src/backend/TTChatBot/config/asgi.py +16 -0
- src/backend/TTChatBot/config/celery.py +24 -0
- src/backend/TTChatBot/config/settings/__init__.py +19 -0
- src/backend/TTChatBot/config/settings/common.py +132 -0
- src/backend/TTChatBot/config/settings/local.py +98 -0
- src/backend/TTChatBot/config/settings/prod.py +98 -0
- src/backend/TTChatBot/config/settings/staging.py +0 -0
- src/backend/TTChatBot/config/urls.py +62 -0
- src/backend/TTChatBot/config/wsgi.py +16 -0
- src/backend/TTChatBot/manage.py +22 -0
- src/backend/TTChatBot/storage/.gitkeep +0 -0
- src/frontend/.gitkeep +0 -0
- src/frontend/.prettierignore +1 -0
- src/frontend/.prettierrc +4 -0
- src/frontend/.sample-env +1 -0
- src/frontend/Dockerfile +22 -0
- src/frontend/environments/dev/build.args +1 -0
- src/frontend/environments/prod/build.args +1 -0
- src/frontend/next-env.d.ts +5 -0
- src/frontend/next.config.js +11 -0
- src/frontend/package-lock.json +0 -0
- src/frontend/package.json +30 -0
- src/frontend/postcss.config.js +6 -0
- src/frontend/public/favicon.webp +0 -0
- src/frontend/public/locales/en.ts +15 -0
.gitignore
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# For experiment of counting tokens
|
2 |
+
src/AI/tokens.py
|
3 |
+
|
4 |
+
# Byte-compiled / optimized / DLL files
|
5 |
+
__pycache__/
|
6 |
+
*.py[cod]
|
7 |
+
*$py.class
|
8 |
+
|
9 |
+
# C extensions
|
10 |
+
*.so
|
11 |
+
|
12 |
+
# Distribution / packaging
|
13 |
+
.Python
|
14 |
+
build/
|
15 |
+
develop-eggs/
|
16 |
+
dist/
|
17 |
+
downloads/
|
18 |
+
eggs/
|
19 |
+
.eggs/
|
20 |
+
lib/
|
21 |
+
lib64/
|
22 |
+
parts/
|
23 |
+
sdist/
|
24 |
+
var/
|
25 |
+
wheels/
|
26 |
+
share/python-wheels/
|
27 |
+
*.egg-info/
|
28 |
+
.installed.cfg
|
29 |
+
*.egg
|
30 |
+
MANIFEST
|
31 |
+
|
32 |
+
# PyInstaller
|
33 |
+
# Usually these files are written by a python script from a template
|
34 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
35 |
+
*.manifest
|
36 |
+
*.spec
|
37 |
+
|
38 |
+
# Installer logs
|
39 |
+
pip-log.txt
|
40 |
+
pip-delete-this-directory.txt
|
41 |
+
|
42 |
+
# Unit test / coverage reports
|
43 |
+
htmlcov/
|
44 |
+
.tox/
|
45 |
+
.nox/
|
46 |
+
.coverage
|
47 |
+
.coverage.*
|
48 |
+
.cache
|
49 |
+
nosetests.xml
|
50 |
+
coverage.xml
|
51 |
+
*.cover
|
52 |
+
*.py,cover
|
53 |
+
.hypothesis/
|
54 |
+
.pytest_cache/
|
55 |
+
cover/
|
56 |
+
|
57 |
+
# Translations
|
58 |
+
*.mo
|
59 |
+
*.pot
|
60 |
+
|
61 |
+
# Django stuff:
|
62 |
+
*.log
|
63 |
+
local_settings.py
|
64 |
+
db.sqlite3
|
65 |
+
db.sqlite3-journal
|
66 |
+
|
67 |
+
# Flask stuff:
|
68 |
+
instance/
|
69 |
+
.webassets-cache
|
70 |
+
|
71 |
+
# Scrapy stuff:
|
72 |
+
.scrapy
|
73 |
+
|
74 |
+
# Sphinx documentation
|
75 |
+
docs/_build/
|
76 |
+
|
77 |
+
# PyBuilder
|
78 |
+
.pybuilder/
|
79 |
+
target/
|
80 |
+
|
81 |
+
# Jupyter Notebook
|
82 |
+
.ipynb_checkpoints
|
83 |
+
|
84 |
+
# IPython
|
85 |
+
profile_default/
|
86 |
+
ipython_config.py
|
87 |
+
|
88 |
+
# pyenv
|
89 |
+
# For a library or package, you might want to ignore these files since the code is
|
90 |
+
# intended to run in multiple environments; otherwise, check them in:
|
91 |
+
# .python-version
|
92 |
+
|
93 |
+
# pipenv
|
94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97 |
+
# install all needed dependencies.
|
98 |
+
#Pipfile.lock
|
99 |
+
|
100 |
+
# poetry
|
101 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
102 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
103 |
+
# commonly ignored for libraries.
|
104 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
105 |
+
#poetry.lock
|
106 |
+
|
107 |
+
# pdm
|
108 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
109 |
+
#pdm.lock
|
110 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
111 |
+
# in version control.
|
112 |
+
# https://pdm.fming.dev/#use-with-ide
|
113 |
+
.pdm.toml
|
114 |
+
|
115 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116 |
+
__pypackages__/
|
117 |
+
|
118 |
+
# Celery stuff
|
119 |
+
celerybeat-schedule
|
120 |
+
celerybeat.pid
|
121 |
+
|
122 |
+
# SageMath parsed files
|
123 |
+
*.sage.py
|
124 |
+
|
125 |
+
# Environments
|
126 |
+
.env
|
127 |
+
.venv
|
128 |
+
env/
|
129 |
+
venv/
|
130 |
+
ENV/
|
131 |
+
env.bak/
|
132 |
+
venv.bak/
|
133 |
+
|
134 |
+
# Spyder project settings
|
135 |
+
.spyderproject
|
136 |
+
.spyproject
|
137 |
+
|
138 |
+
# Rope project settings
|
139 |
+
.ropeproject
|
140 |
+
|
141 |
+
# mkdocs documentation
|
142 |
+
/site
|
143 |
+
|
144 |
+
# mypy
|
145 |
+
.mypy_cache/
|
146 |
+
.dmypy.json
|
147 |
+
dmypy.json
|
148 |
+
|
149 |
+
# Pyre type checker
|
150 |
+
.pyre/
|
151 |
+
|
152 |
+
# pytype static type analyzer
|
153 |
+
.pytype/
|
154 |
+
|
155 |
+
# Cython debug symbols
|
156 |
+
cython_debug/
|
157 |
+
|
158 |
+
# PyCharm
|
159 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
160 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
161 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
162 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
163 |
+
#.idea/
|
164 |
+
|
165 |
+
# ignore training data
|
166 |
+
src/data/tt-content-postprocess
|
167 |
+
src/data/tt-content-debug
|
168 |
+
src/data/tt-content
|
169 |
+
src/scraper/tt-content-postprocess
|
170 |
+
src/scraper/tt-content-debug
|
171 |
+
src/scraper/tt-content
|
172 |
+
src/scraper/tt-klever-content
|
173 |
+
|
174 |
+
# staticfile
|
175 |
+
src/backend/TTChatBot/staticfiles/
|
176 |
+
src/backend/TTChatBot/static/
|
177 |
+
|
178 |
+
# MacOS
|
179 |
+
.DS_Store
|
180 |
+
|
181 |
+
# Frontend
|
182 |
+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
183 |
+
|
184 |
+
# dependencies
|
185 |
+
src/frontend/node_modules/
|
186 |
+
/.pnp
|
187 |
+
.pnp.js
|
188 |
+
|
189 |
+
# next.js
|
190 |
+
src/frontend/.next/
|
191 |
+
/out/
|
192 |
+
|
193 |
+
# misc
|
194 |
+
*.pem
|
195 |
+
|
196 |
+
# debug
|
197 |
+
npm-debug.log*
|
198 |
+
yarn-debug.log*
|
199 |
+
yarn-error.log*
|
200 |
+
.pnpm-debug.log*
|
201 |
+
|
202 |
+
# vercel
|
203 |
+
.vercel
|
204 |
+
|
205 |
+
# Pycharm
|
206 |
+
.idea
|
207 |
+
|
208 |
+
# Env vars - to be updated
|
209 |
+
/infra/ci/secret.yaml
|
210 |
+
|
211 |
+
# Local .terraform directories
|
212 |
+
**/.terraform/*
|
213 |
+
|
214 |
+
# .tfstate files
|
215 |
+
*.tfstate
|
216 |
+
*.tfstate.*
|
217 |
+
|
218 |
+
# Exclude all .tfvars files, which are likely to contain sensitive data, such as
|
219 |
+
# password, private keys, and other secrets. These should not be part of version
|
220 |
+
# control as they are data points which are potentially sensitive and subject
|
221 |
+
# to change depending on the environment.
|
222 |
+
*.tfvars
|
223 |
+
*.tfvars.json
|
224 |
+
|
225 |
+
# Ignore override files as they are usually used to override resources locally and so
|
226 |
+
# are not checked in
|
227 |
+
override.tf
|
228 |
+
override.tf.json
|
229 |
+
*_override.tf
|
230 |
+
*_override.tf.json
|
231 |
+
|
232 |
+
# Include override files you do wish to add to version control using negated pattern
|
233 |
+
# !example_override.tf
|
234 |
+
|
235 |
+
# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
|
236 |
+
# example: *tfplan*
|
237 |
+
|
238 |
+
# Ignore CLI configuration files
|
239 |
+
.terraformrc
|
240 |
+
terraform.rc
|
241 |
+
.terraform.lock.hcl
|
242 |
+
# ignore .vscode
|
243 |
+
.vscode
|
244 |
+
|
245 |
+
# Ignore sensitive data - k8s env vars
|
246 |
+
infra/environments/chatbot-dev/dev_secret.yaml
|
247 |
+
infra/environments/chatbot-prod/prod_secret.yaml
|
248 |
+
infra/environments/tt-chatbot-prod/prod_secret.yaml
|
249 |
+
|
250 |
+
# yarn file
|
251 |
+
yarn.lock
|
models/freemind/embeddings/freemind.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AI packages
|
2 |
+
gradio==3.40.1 # for demo with interface
|
3 |
+
llama-index==0.7.4 # enables the LLM to access the outside information that serves as our knowledge base
|
4 |
+
openai==0.27.8 # for generating embeddings
|
5 |
+
pandas==2.0.3 # for DataFrames to store article sections and embeddings
|
6 |
+
scipy==1.11.1 # for calculating vector similarities for search
|
7 |
+
tiktoken==0.4.0 # for counting tokens
|
8 |
+
typing_extensions==4.5.0 # for TypeError: issubclass() arg 1 must be a class of langchain
|
9 |
+
|
10 |
+
# BE/FE packages
|
11 |
+
requests==2.31.0
|
12 |
+
tqdm==4.65.0
|
13 |
+
django==4.2.4
|
14 |
+
python-dotenv==1.0.0
|
15 |
+
beautifulsoup4==4.12.2
|
16 |
+
# django rest API
|
17 |
+
djangorestframework==3.14.0
|
18 |
+
drf-yasg==1.21.7
|
19 |
+
celery==5.3.1
|
20 |
+
django-celery-results==2.5.1
|
21 |
+
psycopg2-binary==2.9.6
|
22 |
+
# message broker for celery
|
23 |
+
redis==4.6.0
|
24 |
+
# threading library for celery
|
25 |
+
gevent==23.7.0
|
26 |
+
# production
|
27 |
+
gunicorn==21.2.0
|
28 |
+
# cors
|
29 |
+
django-cors-headers==4.2.0
|
30 |
+
# convert html to text
|
31 |
+
html2text==2020.1.16
|
32 |
+
# serving staticfiles without using ngnix
|
33 |
+
whitenoise==6.5.0
|
src/AI/__init__.py
ADDED
File without changes
|
src/AI/ai_configs.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Khanh Phan
|
3 |
+
Date: 2023-04-20
|
4 |
+
"""
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
|
8 |
+
# MODEL PARAMETERS: https://platform.openai.com/docs/models/gpt-3-5
|
9 |
+
MODEL_NAME = "gpt-3.5-turbo" # Must select from MODEL_NAMES
|
10 |
+
MODEL_NAMES = ["gpt-4", "text-davinci-003", "gpt-3.5-turbo"]
|
11 |
+
EMBEDDING_MODEL = (
|
12 |
+
"text-embedding-ada-002" # OpenAI's best embeddings as of Apr 2023
|
13 |
+
)
|
14 |
+
|
15 |
+
# CHATBOT SERVICE
|
16 |
+
SERVICE = "freemind" # Must select from SERVICES
|
17 |
+
SERVICES = ["TokyoTechies", "Klever", "Test", "freemind"]
|
18 |
+
|
19 |
+
# DATA FORMATTING
|
20 |
+
DELIMITER_TOKYOTECHIES = "Sub Section:"
|
21 |
+
FILE_TYPE = ".txt"
|
22 |
+
FILE_ENCODING = "utf-8"
|
23 |
+
INTRODUCTION_MESSAGE = (
|
24 |
+
f"You are a chatbot of {SERVICE}. "
|
25 |
+
f"Use the below articles on the {SERVICE} to answer the subsequent question. " # noqa: E501
|
26 |
+
"If an answer cannot be found in the articles, write sorry that I cannot answer your request, please contact our support team for further assistance." # noqa: E501
|
27 |
+
r'If an answer is found, add embedding title in this format "[Title](URL)" to the end of an answer and ignore the same title.' # noqa: E501
|
28 |
+
)
|
29 |
+
SYSTEM_CONTENT = "You answer questions about {SERVICE}"
|
30 |
+
|
31 |
+
# CALCULATE EMBEDDING PARAMETERS
|
32 |
+
MAX_TOKENS = 1600 # maximum tokens for a section
|
33 |
+
BATCH_SIZE = 1000 # up to 2048 embedding inputs per request
|
34 |
+
TOKEN_BUDGET = 4096 - 500
|
35 |
+
|
36 |
+
# TRAINING PARAMETERS
|
37 |
+
CONTEXT_WINDOW = 4096 # Context window for the LLM.
|
38 |
+
NUM_OUTPUTS = 512 # Number of outputs for the LLM.
|
39 |
+
CHUNK_OVERLAP_RATIO = 0.1 # Chunk overlap as a ratio of chunk size
|
40 |
+
TEMPERATURE = 0.0 # A parameter that controls the “creativity” or
|
41 |
+
# randomness of the text generated. A higher temperature (e.g., 0.7)
|
42 |
+
# results in more diverse and creative output, while a lower temperature
|
43 |
+
# (e.g., 0.2) makes the output more deterministic and focused.
|
44 |
+
|
45 |
+
sys.path.append(os.path.abspath(os.path.join("..", "data")))
|
46 |
+
|
47 |
+
# PATH
|
48 |
+
if SERVICE in SERVICES:
|
49 |
+
if MODEL_NAME in MODEL_NAMES:
|
50 |
+
# Path to training files:
|
51 |
+
FOLDERPATH_DOCUMENTS = os.path.join(
|
52 |
+
"data",
|
53 |
+
SERVICE,
|
54 |
+
"training_files",
|
55 |
+
)
|
56 |
+
# Path to model
|
57 |
+
FOLDERPATH_INDEXES = os.path.join(
|
58 |
+
"models",
|
59 |
+
SERVICE,
|
60 |
+
MODEL_NAME,
|
61 |
+
)
|
62 |
+
FILEPATH_EMBEDDINGS = os.path.join(
|
63 |
+
"models",
|
64 |
+
SERVICE,
|
65 |
+
"embeddings",
|
66 |
+
f"{SERVICE}.csv",
|
67 |
+
)
|
68 |
+
# For evaluation
|
69 |
+
FOLDERPATH_QUESTION = os.path.join(
|
70 |
+
"data",
|
71 |
+
SERVICE,
|
72 |
+
"evaluation",
|
73 |
+
"questions",
|
74 |
+
)
|
75 |
+
FOLDERPATH_QA = os.path.join(
|
76 |
+
"data",
|
77 |
+
SERVICE,
|
78 |
+
"evaluation",
|
79 |
+
"QA_" + MODEL_NAME,
|
80 |
+
)
|
81 |
+
else:
|
82 |
+
raise ValueError("MODEL_NAME must be in MODEL_NAMES")
|
83 |
+
else:
|
84 |
+
raise ValueError("SERVICE must be in SERVICES")
|
src/AI/chatbot_demo.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Khanh Phan
|
3 |
+
Date: 2023-04-20
|
4 |
+
"""
|
5 |
+
import configparser
|
6 |
+
import os
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
import openai
|
10 |
+
from ai_configs import (
|
11 |
+
FOLDERPATH_INDEXES,
|
12 |
+
MODEL_NAME,
|
13 |
+
)
|
14 |
+
from llama_index import (
|
15 |
+
StorageContext,
|
16 |
+
load_index_from_storage,
|
17 |
+
)
|
18 |
+
|
19 |
+
env = configparser.ConfigParser()
|
20 |
+
env.read(".env")
|
21 |
+
os.environ["OPENAI_API_KEY"] = env["OpenAI"]["OPENAI_KEY_TT"]
|
22 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
23 |
+
|
24 |
+
|
25 |
+
def format_response(responses: dict):
|
26 |
+
"""
|
27 |
+
(Optional) Format one or multiple responses from version(s) of chatbot
|
28 |
+
|
29 |
+
Parameters:
|
30 |
+
responses (dict): chatbot response with the name of model
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
output (str): formatted reponse
|
34 |
+
"""
|
35 |
+
output = ""
|
36 |
+
for response in responses:
|
37 |
+
output += response + (responses[response]) + "\n\n"
|
38 |
+
return output
|
39 |
+
|
40 |
+
|
41 |
+
def chat(message, history):
|
42 |
+
"""
|
43 |
+
Load index to chatbot and get response
|
44 |
+
|
45 |
+
Parameters:
|
46 |
+
message (dict): question to chatbot
|
47 |
+
history (list): history of the whole conversation
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
history (list): history of the whole conversation (for displaying)
|
51 |
+
history (list): state of the chatbot
|
52 |
+
"""
|
53 |
+
history = history or []
|
54 |
+
# rebuild storage context
|
55 |
+
FOLDERPATH_INDEXES_EN = FOLDERPATH_INDEXES + "_en"
|
56 |
+
storage_context = StorageContext.from_defaults(
|
57 |
+
persist_dir=FOLDERPATH_INDEXES_EN,
|
58 |
+
)
|
59 |
+
|
60 |
+
# load index to memory
|
61 |
+
index = load_index_from_storage(storage_context)
|
62 |
+
|
63 |
+
# open QA engine
|
64 |
+
query_engine = index.as_query_engine()
|
65 |
+
|
66 |
+
# Get the response from OpenAI
|
67 |
+
response_en = query_engine.query(message)
|
68 |
+
print("Q: ", message)
|
69 |
+
print("A: ", response_en.response, "\n")
|
70 |
+
|
71 |
+
# ---------- JAPANESE
|
72 |
+
# rebuild storage context
|
73 |
+
FOLDERPATH_INDEXES_JA = FOLDERPATH_INDEXES + "_ja"
|
74 |
+
storage_context_ja = StorageContext.from_defaults(
|
75 |
+
persist_dir=FOLDERPATH_INDEXES_JA,
|
76 |
+
)
|
77 |
+
|
78 |
+
# load index to memory
|
79 |
+
index_ja = load_index_from_storage(storage_context_ja)
|
80 |
+
|
81 |
+
# open QA engine
|
82 |
+
query_engine_ja = index_ja.as_query_engine()
|
83 |
+
|
84 |
+
# Get the response from OpenAI
|
85 |
+
response_ja = query_engine_ja.query(message)
|
86 |
+
print("Q: ", message)
|
87 |
+
print("A: ", response_ja.response, "\n")
|
88 |
+
######
|
89 |
+
|
90 |
+
# Format the response
|
91 |
+
responses = {
|
92 |
+
f"---{MODEL_NAME} (English)---": response_en.response,
|
93 |
+
f"---{MODEL_NAME} (Japanese)---": response_ja.response,
|
94 |
+
}
|
95 |
+
|
96 |
+
response = format_response(responses)
|
97 |
+
|
98 |
+
# Append the response to history (to show in the UI)
|
99 |
+
history.append((message, response))
|
100 |
+
|
101 |
+
return history, history
|
102 |
+
|
103 |
+
|
104 |
+
# Call the chat using gradio which supports UI for chatbot and is shareable
|
105 |
+
chatgpt = gr.Interface(
|
106 |
+
chat,
|
107 |
+
["text", "state"],
|
108 |
+
["chatbot", "state"],
|
109 |
+
allow_flagging="never",
|
110 |
+
)
|
111 |
+
|
112 |
+
chatgpt.launch(share=True) # share=True to share the chat publicly
|
src/AI/embedding.py
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Khanh Phan
|
3 |
+
Date: 2023-07-20
|
4 |
+
"""
|
5 |
+
|
6 |
+
import configparser
|
7 |
+
import os
|
8 |
+
|
9 |
+
import openai
|
10 |
+
import pandas as pd
|
11 |
+
import tiktoken
|
12 |
+
from ai_configs import (
|
13 |
+
BATCH_SIZE,
|
14 |
+
DELIMITER_TOKYOTECHIES,
|
15 |
+
EMBEDDING_MODEL,
|
16 |
+
FILE_ENCODING,
|
17 |
+
FILE_TYPE,
|
18 |
+
FILEPATH_EMBEDDINGS,
|
19 |
+
FOLDERPATH_DOCUMENTS,
|
20 |
+
MAX_TOKENS,
|
21 |
+
MODEL_NAME,
|
22 |
+
SERVICE,
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
def list_files(directory: str) -> list:
|
27 |
+
files = []
|
28 |
+
for file in os.listdir(directory):
|
29 |
+
# check only text files
|
30 |
+
if file.endswith(FILE_TYPE):
|
31 |
+
files.append(file)
|
32 |
+
return files
|
33 |
+
|
34 |
+
|
35 |
+
def read_file(file_path: str) -> str:
|
36 |
+
# Open a file: file
|
37 |
+
file = open(file_path, encoding=FILE_ENCODING)
|
38 |
+
|
39 |
+
# read all lines at once
|
40 |
+
file_content = file.read()
|
41 |
+
|
42 |
+
# close the file
|
43 |
+
file.close()
|
44 |
+
return file_content
|
45 |
+
|
46 |
+
|
47 |
+
def num_tokens(text: str, model: str = MODEL_NAME) -> int:
|
48 |
+
"""Return the number of tokens in a string."""
|
49 |
+
encoding = tiktoken.encoding_for_model(model)
|
50 |
+
return len(encoding.encode(text))
|
51 |
+
|
52 |
+
|
53 |
+
def truncated_string(
|
54 |
+
string: str,
|
55 |
+
model: str,
|
56 |
+
max_tokens: int,
|
57 |
+
print_warning: bool = True,
|
58 |
+
) -> str:
|
59 |
+
"""Truncate a string to a maximum number of tokens."""
|
60 |
+
encoding = tiktoken.encoding_for_model(model)
|
61 |
+
encoded_string = encoding.encode(string)
|
62 |
+
truncated_string = encoding.decode(encoded_string[:max_tokens])
|
63 |
+
if print_warning and len(encoded_string) > max_tokens:
|
64 |
+
print(
|
65 |
+
f"Warning: Truncated string from {len(encoded_string)} tokens to {max_tokens} tokens.", # noqa: E501
|
66 |
+
)
|
67 |
+
return truncated_string
|
68 |
+
|
69 |
+
|
70 |
+
def determine_delimiter(
|
71 |
+
strings: str,
|
72 |
+
service: str = SERVICE,
|
73 |
+
) -> str:
|
74 |
+
"""
|
75 |
+
Determine the delimiter of the file
|
76 |
+
"""
|
77 |
+
if service == "TokyoTechies":
|
78 |
+
return DELIMITER_TOKYOTECHIES
|
79 |
+
elif service == "Klever":
|
80 |
+
if "# " in strings:
|
81 |
+
return "# "
|
82 |
+
elif "## " in strings:
|
83 |
+
return "## "
|
84 |
+
elif "### " in strings:
|
85 |
+
return "### "
|
86 |
+
else:
|
87 |
+
return False
|
88 |
+
raise ValueError(
|
89 |
+
f"No delimiter found in Klever file: {strings[0:20]}",
|
90 |
+
)
|
91 |
+
elif service == "freemind":
|
92 |
+
return "-----"
|
93 |
+
else:
|
94 |
+
raise ValueError(f"Unknown service: {service}")
|
95 |
+
|
96 |
+
|
97 |
+
def format_content_Tokyo_Techies(
|
98 |
+
strings: str,
|
99 |
+
content: str,
|
100 |
+
max_tokens: int = 1000,
|
101 |
+
model: str = MODEL_NAME,
|
102 |
+
):
|
103 |
+
"""
|
104 |
+
Format content for Tokyo Techies
|
105 |
+
"""
|
106 |
+
chunks = content.split(determine_delimiter(content))
|
107 |
+
# TODO: add to config
|
108 |
+
if "URL:" and "Language:" in chunks[0]:
|
109 |
+
url = (
|
110 |
+
"<url>"
|
111 |
+
+ (content.split("URL:"))[1].split("Language")[0].strip()
|
112 |
+
+ "</url>"
|
113 |
+
) # get url
|
114 |
+
else:
|
115 |
+
url = "<url>No URL</url>"
|
116 |
+
|
117 |
+
for chunk in chunks[1:]:
|
118 |
+
chunk = (
|
119 |
+
chunk.strip()
|
120 |
+
) # remove leading and trailing whitespace and newline
|
121 |
+
if not chunk:
|
122 |
+
continue
|
123 |
+
|
124 |
+
# get section title (first row) and content (from 2nd row)
|
125 |
+
section_title = chunk.split("\n")[0]
|
126 |
+
titles = [url, section_title]
|
127 |
+
section_content = chunk.split("\n")[1:]
|
128 |
+
section_content = "\n".join(section_content)
|
129 |
+
|
130 |
+
if num_tokens(section_content) > max_tokens:
|
131 |
+
print(
|
132 |
+
f"{titles} ({num_tokens(section_content)}) has more than {max_tokens} tokens", # noqa: E501
|
133 |
+
)
|
134 |
+
section_content = truncated_string(
|
135 |
+
section_content,
|
136 |
+
model=model,
|
137 |
+
max_tokens=max_tokens,
|
138 |
+
)
|
139 |
+
|
140 |
+
string = "\n\n".join(titles + [section_content])
|
141 |
+
strings.extend([string])
|
142 |
+
print(string)
|
143 |
+
return strings
|
144 |
+
|
145 |
+
|
146 |
+
def format_content_klever(
|
147 |
+
strings: str,
|
148 |
+
content: str,
|
149 |
+
max_tokens: int = 1000,
|
150 |
+
model: str = MODEL_NAME,
|
151 |
+
):
|
152 |
+
"""
|
153 |
+
Format content for Klever
|
154 |
+
"""
|
155 |
+
|
156 |
+
# Add images tag to image link
|
157 |
+
content = content.replace("
|
158 |
+
|
159 |
+
delimiter = determine_delimiter(content)
|
160 |
+
if delimiter:
|
161 |
+
chunks = content.split(delimiter)
|
162 |
+
else:
|
163 |
+
chunks = [content]
|
164 |
+
|
165 |
+
# TODO: add to config
|
166 |
+
url = ""
|
167 |
+
if "Title:" and "URL:" in chunks[0]:
|
168 |
+
title = "Title: " + (
|
169 |
+
(content.split("Title:"))[1].split("URL")[0].strip()
|
170 |
+
)
|
171 |
+
if "Language:" in chunks[0]:
|
172 |
+
url = (
|
173 |
+
"<url>"
|
174 |
+
+ (content.split("URL:"))[1].split("Language:")[0].strip()
|
175 |
+
+ "</url>"
|
176 |
+
)
|
177 |
+
else:
|
178 |
+
title = ""
|
179 |
+
|
180 |
+
# Extract content between title and the first sub-section
|
181 |
+
section_content = (chunks[0].split("-----"))[1].strip()
|
182 |
+
if section_content != "":
|
183 |
+
titles = [title, url]
|
184 |
+
string = "\n\n".join(titles + [section_content])
|
185 |
+
# print(f"----------\n{string}\n")
|
186 |
+
strings.extend([string])
|
187 |
+
|
188 |
+
# Extract contentin every sub-section
|
189 |
+
for chunk in chunks[1:]:
|
190 |
+
chunk = (
|
191 |
+
chunk.strip()
|
192 |
+
) # remove leading and trailing whitespace and newline
|
193 |
+
if not chunk:
|
194 |
+
continue
|
195 |
+
|
196 |
+
# get section title (first row) and content (from 2nd row)
|
197 |
+
section_title = chunk.split("\n")[0]
|
198 |
+
titles = [title + " > " + section_title, url]
|
199 |
+
section_content = chunk.split("\n")[1:]
|
200 |
+
section_content = "\n".join(section_content)
|
201 |
+
|
202 |
+
if num_tokens(section_content) > max_tokens:
|
203 |
+
print(
|
204 |
+
f"{titles} ({num_tokens(section_content)}) has more than {max_tokens} tokens", # noqa: E501
|
205 |
+
)
|
206 |
+
section_content = truncated_string(
|
207 |
+
section_content,
|
208 |
+
model=model,
|
209 |
+
max_tokens=max_tokens,
|
210 |
+
)
|
211 |
+
|
212 |
+
string = "\n\n".join(titles + [section_content])
|
213 |
+
# print(f"----------\n{string}\n")
|
214 |
+
strings.extend([string])
|
215 |
+
return strings
|
216 |
+
|
217 |
+
|
218 |
+
def format_content_freemind(
|
219 |
+
strings: str,
|
220 |
+
content: str,
|
221 |
+
max_tokens: int = 1000,
|
222 |
+
model: str = MODEL_NAME,
|
223 |
+
):
|
224 |
+
"""
|
225 |
+
Format content for freemind
|
226 |
+
"""
|
227 |
+
chunks = content.split(determine_delimiter(content))
|
228 |
+
for chunk in chunks:
|
229 |
+
chunk = (
|
230 |
+
chunk.strip()
|
231 |
+
) # remove leading and trailing whitespace and newline
|
232 |
+
if not chunk:
|
233 |
+
continue
|
234 |
+
|
235 |
+
if num_tokens(chunk) > max_tokens:
|
236 |
+
print(
|
237 |
+
f"{chunk} ({num_tokens(section_content)}) has more than {max_tokens} tokens", # noqa: E501
|
238 |
+
)
|
239 |
+
section_content = truncated_string(
|
240 |
+
section_content,
|
241 |
+
model=model,
|
242 |
+
max_tokens=max_tokens,
|
243 |
+
)
|
244 |
+
|
245 |
+
string = chunk
|
246 |
+
# print(f"----------\n{string}\n")
|
247 |
+
strings.extend([string])
|
248 |
+
return strings
|
249 |
+
|
250 |
+
|
251 |
+
def format_content(
|
252 |
+
directory: str,
|
253 |
+
max_tokens: int = 1000,
|
254 |
+
model: str = MODEL_NAME,
|
255 |
+
) -> list[str]:
|
256 |
+
strings = []
|
257 |
+
|
258 |
+
# read files
|
259 |
+
files = list_files(directory)
|
260 |
+
for file in files:
|
261 |
+
print(f"File: {file}")
|
262 |
+
file_content = read_file(
|
263 |
+
os.path.join(
|
264 |
+
FOLDERPATH_DOCUMENTS,
|
265 |
+
file,
|
266 |
+
),
|
267 |
+
)
|
268 |
+
|
269 |
+
if SERVICE == "TokyoTechies":
|
270 |
+
strings = format_content_Tokyo_Techies(
|
271 |
+
strings,
|
272 |
+
file_content,
|
273 |
+
max_tokens,
|
274 |
+
model,
|
275 |
+
)
|
276 |
+
elif SERVICE == "Klever":
|
277 |
+
strings = format_content_klever(
|
278 |
+
strings,
|
279 |
+
file_content,
|
280 |
+
max_tokens,
|
281 |
+
model,
|
282 |
+
)
|
283 |
+
elif SERVICE == "freemind":
|
284 |
+
strings = format_content_freemind(
|
285 |
+
strings,
|
286 |
+
file_content,
|
287 |
+
max_tokens,
|
288 |
+
model,
|
289 |
+
)
|
290 |
+
|
291 |
+
return strings
|
292 |
+
|
293 |
+
|
294 |
+
def embed_data():
|
295 |
+
# read config
|
296 |
+
env = configparser.ConfigParser()
|
297 |
+
env.read(".env")
|
298 |
+
os.environ["OPENAI_API_KEY"] = env["OpenAI"]["OPENAI_KEY_TT"]
|
299 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
300 |
+
|
301 |
+
formatted_strings = format_content(FOLDERPATH_DOCUMENTS, MAX_TOKENS)
|
302 |
+
|
303 |
+
embeddings = []
|
304 |
+
for batch_start in range(0, len(formatted_strings), BATCH_SIZE):
|
305 |
+
batch_end = batch_start + BATCH_SIZE
|
306 |
+
batch = formatted_strings[batch_start:batch_end]
|
307 |
+
print(f"Batch {batch_start} to {batch_end-1}")
|
308 |
+
response = openai.Embedding.create(model=EMBEDDING_MODEL, input=batch)
|
309 |
+
for i, be in enumerate(response["data"]):
|
310 |
+
assert (
|
311 |
+
i == be["index"]
|
312 |
+
) # double check embeddings are in same order as input
|
313 |
+
batch_embeddings = [e["embedding"] for e in response["data"]]
|
314 |
+
embeddings.extend(batch_embeddings)
|
315 |
+
|
316 |
+
df = pd.DataFrame({"text": formatted_strings, "embedding": embeddings})
|
317 |
+
|
318 |
+
# save document chunks and embeddings
|
319 |
+
SAVE_PATH = FILEPATH_EMBEDDINGS
|
320 |
+
df.to_csv(SAVE_PATH, index=False)
|
321 |
+
|
322 |
+
|
323 |
+
embed_data()
|
src/AI/evaluation.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Khanh Phan
|
3 |
+
Date: 2023-06-20
|
4 |
+
"""
|
5 |
+
import configparser
|
6 |
+
import os
|
7 |
+
|
8 |
+
import openai
|
9 |
+
from ai_configs import (
|
10 |
+
FOLDERPATH_INDEXES,
|
11 |
+
FOLDERPATH_QA,
|
12 |
+
FOLDERPATH_QUESTION,
|
13 |
+
)
|
14 |
+
from llama_index import (
|
15 |
+
StorageContext,
|
16 |
+
load_index_from_storage,
|
17 |
+
)
|
18 |
+
|
19 |
+
env = configparser.ConfigParser()
|
20 |
+
env.read(".env")
|
21 |
+
os.environ["OPENAI_API_KEY"] = env["OpenAI"]["OPENAI_KEY_TT"]
|
22 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
23 |
+
|
24 |
+
|
25 |
+
def get_question_files(path: str = FOLDERPATH_QUESTION) -> None:
|
26 |
+
"""
|
27 |
+
Get the directory.
|
28 |
+
Check whether it is valid (a folder or a text file) or invalid.
|
29 |
+
If valid, generate the answer from the questions in the directory.
|
30 |
+
|
31 |
+
Parameters:
|
32 |
+
path (str): Path to a question file/folder
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
None
|
36 |
+
"""
|
37 |
+
if os.path.isdir(path) is True:
|
38 |
+
for file in os.listdir(path):
|
39 |
+
if file.endswith(".txt"):
|
40 |
+
print("Generating answer from: ", os.path.join(path, file))
|
41 |
+
generate_answers(os.path.join(path, file))
|
42 |
+
elif os.path.isfile(path) is True and path.endswith(".txt"):
|
43 |
+
print("Generating answer from:", path)
|
44 |
+
generate_answers(path)
|
45 |
+
else:
|
46 |
+
raise Exception("Input is not a directory of a folder or a text file")
|
47 |
+
|
48 |
+
|
49 |
+
def generate_answers(
|
50 |
+
file_directory: str,
|
51 |
+
output_path=FOLDERPATH_QA,
|
52 |
+
) -> None:
|
53 |
+
"""
|
54 |
+
Get the list of questions from file(s),
|
55 |
+
then generate the answers and write to file(s).
|
56 |
+
These answers are used for evaluation
|
57 |
+
|
58 |
+
Parameters:
|
59 |
+
file_directory (str): Path to a question file
|
60 |
+
output_path (str): folder to write the answers
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
None
|
64 |
+
"""
|
65 |
+
|
66 |
+
# Load the questions
|
67 |
+
question_file = open(file_directory)
|
68 |
+
lines = question_file.readlines()
|
69 |
+
|
70 |
+
# Create a file to write the answers
|
71 |
+
file_name = os.path.basename(file_directory)
|
72 |
+
qa_file = open(os.path.join(output_path, file_name), "w")
|
73 |
+
|
74 |
+
count = 0
|
75 |
+
for line in lines: # for each question
|
76 |
+
count += 1
|
77 |
+
# generate the answer
|
78 |
+
response = query_engine.query(line)
|
79 |
+
|
80 |
+
# format the output
|
81 |
+
question = "Q" + str(count) + ": " + str(line)
|
82 |
+
answer = "A" + str(count) + ": " + str(response.response)
|
83 |
+
|
84 |
+
response = question + answer + "\n"
|
85 |
+
print(response)
|
86 |
+
|
87 |
+
# write Q&A to file
|
88 |
+
qa_file.writelines(response)
|
89 |
+
|
90 |
+
question_file.close()
|
91 |
+
qa_file.close()
|
92 |
+
|
93 |
+
|
94 |
+
# rebuild storage context
|
95 |
+
storage_context = StorageContext.from_defaults(persist_dir=FOLDERPATH_INDEXES)
|
96 |
+
|
97 |
+
# load index
|
98 |
+
index = load_index_from_storage(storage_context)
|
99 |
+
query_engine = index.as_query_engine()
|
100 |
+
|
101 |
+
get_question_files()
|
src/AI/klever_search.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast # for converting embeddings saved as strings back to arrays
|
2 |
+
import configparser
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
import openai # for calling the OpenAI API
|
8 |
+
import pandas as pd # for storing text and embeddings data
|
9 |
+
import tiktoken # for counting tokens
|
10 |
+
from ai_configs import (
|
11 |
+
EMBEDDING_MODEL,
|
12 |
+
FILEPATH_EMBEDDINGS,
|
13 |
+
INTRODUCTION_MESSAGE,
|
14 |
+
MODEL_NAME,
|
15 |
+
SYSTEM_CONTENT,
|
16 |
+
TOKEN_BUDGET,
|
17 |
+
)
|
18 |
+
from scipy import spatial # for calculating vector similarities for search
|
19 |
+
|
20 |
+
env = configparser.ConfigParser()
|
21 |
+
env.read(".env")
|
22 |
+
os.environ["OPENAI_API_KEY"] = env["OpenAI"]["OPENAI_KEY_TT"]
|
23 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
24 |
+
|
25 |
+
model_name = MODEL_NAME
|
26 |
+
# Read embbeding file
|
27 |
+
embedding_data = pd.read_csv(FILEPATH_EMBEDDINGS)
|
28 |
+
# Convert embeddings from CSV str type back to list type
|
29 |
+
embedding_data["embedding"] = embedding_data["embedding"].apply(
|
30 |
+
ast.literal_eval,
|
31 |
+
)
|
32 |
+
print("Finished loading embedding data!")
|
33 |
+
|
34 |
+
|
35 |
+
# search function
|
36 |
+
def strings_ranked_by_relatedness(
|
37 |
+
query: str,
|
38 |
+
df: pd.DataFrame,
|
39 |
+
relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
|
40 |
+
top_n: int = 3,
|
41 |
+
) -> tuple[list[str], list[float]]:
|
42 |
+
"""Returns a list of strings and relatednesses,
|
43 |
+
sorted from most related to least.
|
44 |
+
"""
|
45 |
+
query_embedding_response = openai.Embedding.create(
|
46 |
+
model=EMBEDDING_MODEL,
|
47 |
+
input=query,
|
48 |
+
)
|
49 |
+
query_embedding = query_embedding_response["data"][0]["embedding"]
|
50 |
+
strings_and_relatednesses = [
|
51 |
+
(row["text"], relatedness_fn(query_embedding, row["embedding"]))
|
52 |
+
for i, row in df.iterrows()
|
53 |
+
]
|
54 |
+
strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
|
55 |
+
strings, relatednesses = zip(*strings_and_relatednesses)
|
56 |
+
|
57 |
+
return strings[:top_n], relatednesses[:top_n]
|
58 |
+
|
59 |
+
|
60 |
+
embedding_data = pd.read_csv(FILEPATH_EMBEDDINGS)
|
61 |
+
# Convert embeddings from CSV str type back to list type
|
62 |
+
embedding_data["embedding"] = embedding_data["embedding"].apply(
|
63 |
+
ast.literal_eval,
|
64 |
+
)
|
65 |
+
|
66 |
+
query = "what is Klever?"
|
67 |
+
strings, relatedness = strings_ranked_by_relatedness(query, embedding_data)
|
68 |
+
for string in strings:
|
69 |
+
if "</url>" in string:
|
70 |
+
string = string.split("</url>")[0].replace("<url>", "URL: ")
|
71 |
+
print(string)
|
72 |
+
print("----------------")
|
73 |
+
print(relatedness)
|
src/AI/parsing.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import aspose.words as aw
|
2 |
+
|
3 |
+
# Load the PDF document from the disc.
|
4 |
+
doc = aw.Document("TestDocument.pdf")
|
5 |
+
|
6 |
+
# Save the document to DOCX format.
|
7 |
+
doc.save("output.md")
|
src/AI/search.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast # for converting embeddings saved as strings back to arrays
|
2 |
+
import configparser
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
import openai # for calling the OpenAI API
|
8 |
+
import pandas as pd # for storing text and embeddings data
|
9 |
+
import tiktoken # for counting tokens
|
10 |
+
from ai_configs import (
|
11 |
+
EMBEDDING_MODEL,
|
12 |
+
FILEPATH_EMBEDDINGS,
|
13 |
+
INTRODUCTION_MESSAGE,
|
14 |
+
MODEL_NAME,
|
15 |
+
SYSTEM_CONTENT,
|
16 |
+
TOKEN_BUDGET,
|
17 |
+
)
|
18 |
+
from scipy import spatial # for calculating vector similarities for search
|
19 |
+
|
20 |
+
env = configparser.ConfigParser()
|
21 |
+
env.read(".env")
|
22 |
+
os.environ["OPENAI_API_KEY"] = env["OpenAI"]["OPENAI_KEY_TT"]
|
23 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
24 |
+
|
25 |
+
print(openai.api_key)
|
26 |
+
|
27 |
+
model_name = MODEL_NAME
|
28 |
+
# Read embbeding file
|
29 |
+
embedding_data = pd.read_csv(FILEPATH_EMBEDDINGS)
|
30 |
+
# Convert embeddings from CSV str type back to list type
|
31 |
+
embedding_data["embedding"] = embedding_data["embedding"].apply(
|
32 |
+
ast.literal_eval,
|
33 |
+
)
|
34 |
+
print("Finished loading embedding data!")
|
35 |
+
|
36 |
+
|
37 |
+
# search function
|
38 |
+
def strings_ranked_by_relatedness(
|
39 |
+
query: str,
|
40 |
+
df: pd.DataFrame,
|
41 |
+
relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
|
42 |
+
top_n: int = 3,
|
43 |
+
) -> tuple[list[str], list[float]]:
|
44 |
+
"""Returns a list of strings and relatednesses,
|
45 |
+
sorted from most related to least.
|
46 |
+
"""
|
47 |
+
query_embedding_response = openai.Embedding.create(
|
48 |
+
model=EMBEDDING_MODEL,
|
49 |
+
input=query,
|
50 |
+
)
|
51 |
+
query_embedding = query_embedding_response["data"][0]["embedding"]
|
52 |
+
strings_and_relatednesses = [
|
53 |
+
(row["text"], relatedness_fn(query_embedding, row["embedding"]))
|
54 |
+
for i, row in df.iterrows()
|
55 |
+
]
|
56 |
+
strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
|
57 |
+
strings, relatednesses = zip(*strings_and_relatednesses)
|
58 |
+
|
59 |
+
return strings[:top_n], relatednesses[:top_n]
|
60 |
+
|
61 |
+
|
62 |
+
def num_tokens(text: str, model: str = MODEL_NAME) -> int:
|
63 |
+
"""Return the number of tokens in a string."""
|
64 |
+
encoding = tiktoken.encoding_for_model(model)
|
65 |
+
return len(encoding.encode(text))
|
66 |
+
|
67 |
+
|
68 |
+
def query_message(
|
69 |
+
query: str,
|
70 |
+
df: pd.DataFrame,
|
71 |
+
model: str,
|
72 |
+
token_budget: int,
|
73 |
+
) -> str:
|
74 |
+
"""Return a message for GPT,
|
75 |
+
with relevant source texts pulled from a dataframe.
|
76 |
+
"""
|
77 |
+
strings, _ = strings_ranked_by_relatedness(query, df)
|
78 |
+
|
79 |
+
""" example:
|
80 |
+
#strings, relatednesses = strings_ranked_by_relatedness(
|
81 |
+
# "what solutions that TT provides?",
|
82 |
+
# df,
|
83 |
+
# top_n=5,
|
84 |
+
# )
|
85 |
+
#for string, relatedness in zip(strings, relatednesses):
|
86 |
+
# print(f"{relatedness=:.3f}\n{string}\n")
|
87 |
+
"""
|
88 |
+
|
89 |
+
question = f"\n\nQuestion: {query}"
|
90 |
+
message = INTRODUCTION_MESSAGE
|
91 |
+
for string in strings:
|
92 |
+
next_article = f"\nTT article section:\n--\n{string}\n--"
|
93 |
+
next_article = f"\nFreemind article section:\n--\n{string}\n--"
|
94 |
+
if (
|
95 |
+
num_tokens(message + next_article + question, model=model)
|
96 |
+
> token_budget
|
97 |
+
):
|
98 |
+
break
|
99 |
+
else:
|
100 |
+
message += next_article
|
101 |
+
return message + question
|
102 |
+
|
103 |
+
|
104 |
+
def get_response(
|
105 |
+
query: str,
|
106 |
+
df: pd.DataFrame,
|
107 |
+
model: str = MODEL_NAME,
|
108 |
+
token_budget: int = TOKEN_BUDGET,
|
109 |
+
print_message: bool = False,
|
110 |
+
) -> str:
|
111 |
+
"""Answers a query using GPT and a dataframe of
|
112 |
+
relevant texts and embeddings.
|
113 |
+
"""
|
114 |
+
message = query_message(query, df, model=model, token_budget=token_budget)
|
115 |
+
|
116 |
+
if print_message:
|
117 |
+
print(message)
|
118 |
+
messages = [
|
119 |
+
{"role": "system", "content": SYSTEM_CONTENT},
|
120 |
+
{"role": "user", "content": message},
|
121 |
+
]
|
122 |
+
|
123 |
+
response = openai.ChatCompletion.create(
|
124 |
+
model=model,
|
125 |
+
messages=messages,
|
126 |
+
temperature=0,
|
127 |
+
)
|
128 |
+
response_message = response["choices"][0]["message"]["content"]
|
129 |
+
print(f'Total used tokens: {response["usage"]["total_tokens"]}')
|
130 |
+
return response_message, message
|
131 |
+
|
132 |
+
|
133 |
+
# Code for getting chatbot's response ends here. Below code is for UI only.
|
134 |
+
def format_response(responses: dict):
|
135 |
+
"""
|
136 |
+
(Optional) Format one or multiple responses from version(s) of chatbot
|
137 |
+
|
138 |
+
Parameters:
|
139 |
+
responses (dict): chatbot response with the name of model
|
140 |
+
|
141 |
+
Returns:
|
142 |
+
output (str): formatted reponse
|
143 |
+
"""
|
144 |
+
output = ""
|
145 |
+
for response in responses:
|
146 |
+
output += response + (responses[response]) + "\n\n"
|
147 |
+
return output
|
148 |
+
|
149 |
+
|
150 |
+
with gr.Blocks() as chatgpt:
|
151 |
+
chatbot = gr.Chatbot(label="Freemind Bot", height=500)
|
152 |
+
message = gr.Textbox(
|
153 |
+
label="Enter your chat here",
|
154 |
+
placeholder="Press enter to send a message",
|
155 |
+
show_copy_button=True,
|
156 |
+
)
|
157 |
+
radio = gr.Radio(
|
158 |
+
[
|
159 |
+
"Full model (most capable but slow & expensive)",
|
160 |
+
"Lite model (Capable but fast & cheap)",
|
161 |
+
],
|
162 |
+
label="Choose a chatbot model",
|
163 |
+
value="Lite model (Capable but fast & cheap)",
|
164 |
+
)
|
165 |
+
clear = gr.Button("Clear all chat")
|
166 |
+
|
167 |
+
def choice_model(choice):
|
168 |
+
if choice == "Full model (most capable but slow & expensive)":
|
169 |
+
return "gpt-4"
|
170 |
+
else:
|
171 |
+
return "gpt-3.5-turbo"
|
172 |
+
|
173 |
+
def get_user_message(user_message, history):
|
174 |
+
return "", history + [[user_message, None]]
|
175 |
+
|
176 |
+
def show_response(history, model):
|
177 |
+
message = history[-1][0]
|
178 |
+
model = choice_model(model)
|
179 |
+
print(f"model: {model}")
|
180 |
+
# Get the response from OpenAI
|
181 |
+
response, _ = get_response(
|
182 |
+
query=message,
|
183 |
+
df=embedding_data,
|
184 |
+
model=model,
|
185 |
+
)
|
186 |
+
|
187 |
+
# Correct URL
|
188 |
+
# I will remove this function after BE/FE fixing this bug
|
189 |
+
response = response.replace("help/document/", "wiki/1-")
|
190 |
+
response = response.replace(">>", ">")
|
191 |
+
print("Q: ", message, "\nA: ", response, "\n")
|
192 |
+
|
193 |
+
# Format the response
|
194 |
+
# responses = {
|
195 |
+
# f"[{MODEL_NAME}] → ": response,
|
196 |
+
# }
|
197 |
+
# response = format_response(responses)
|
198 |
+
|
199 |
+
history[-1][1] = ""
|
200 |
+
for character in response:
|
201 |
+
history[-1][1] += character
|
202 |
+
time.sleep(0.01)
|
203 |
+
yield history
|
204 |
+
|
205 |
+
message.submit(
|
206 |
+
get_user_message,
|
207 |
+
[message, chatbot],
|
208 |
+
[message, chatbot],
|
209 |
+
queue=False,
|
210 |
+
).then(
|
211 |
+
show_response,
|
212 |
+
[chatbot, radio],
|
213 |
+
chatbot,
|
214 |
+
)
|
215 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
216 |
+
|
217 |
+
|
218 |
+
chatgpt.queue()
|
219 |
+
chatgpt.launch(share=True) # share=True to share the chat publicly
|
src/AI/training.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Author: Khanh Phan
|
3 |
+
Date: 2023-04-20
|
4 |
+
"""
|
5 |
+
|
6 |
+
import configparser
|
7 |
+
import os
|
8 |
+
|
9 |
+
import openai
|
10 |
+
from ai_configs import ( # CHUNK_SIZE_LIMIT,
|
11 |
+
CHUNK_OVERLAP_RATIO,
|
12 |
+
CONTEXT_WINDOW,
|
13 |
+
FOLDERPATH_DOCUMENTS,
|
14 |
+
FOLDERPATH_INDEXES,
|
15 |
+
MODEL_NAME,
|
16 |
+
NUM_OUTPUTS,
|
17 |
+
TEMPERATURE,
|
18 |
+
)
|
19 |
+
from langchain import OpenAI
|
20 |
+
from llama_index import (
|
21 |
+
GPTVectorStoreIndex,
|
22 |
+
LLMPredictor,
|
23 |
+
PromptHelper,
|
24 |
+
ServiceContext,
|
25 |
+
SimpleDirectoryReader,
|
26 |
+
)
|
27 |
+
|
28 |
+
env = configparser.ConfigParser()
|
29 |
+
env.read(".env")
|
30 |
+
os.environ["OPENAI_API_KEY"] = env["OpenAI"]["OPENAI_KEY_TT"]
|
31 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
32 |
+
|
33 |
+
|
34 |
+
def construct_index(
|
35 |
+
folderpath_documents: str,
|
36 |
+
folderpath_index: str,
|
37 |
+
) -> GPTVectorStoreIndex:
|
38 |
+
"""
|
39 |
+
Construsted index for all the documents.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
folderpath_documents (str): Path to a training folder
|
43 |
+
folderpath_index (str): Path to a folder to save the model
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
document_index (GPTVectorStoreIndex): the model
|
47 |
+
"""
|
48 |
+
|
49 |
+
# Create a prompt helper with initial parameters for the chatbot
|
50 |
+
prompt_helper = PromptHelper(
|
51 |
+
context_window=CONTEXT_WINDOW,
|
52 |
+
num_output=NUM_OUTPUTS,
|
53 |
+
chunk_overlap_ratio=CHUNK_OVERLAP_RATIO,
|
54 |
+
# chunk_size_limit=CHUNK_SIZE_LIMIT,
|
55 |
+
)
|
56 |
+
|
57 |
+
# Configure the LLM provider and model.
|
58 |
+
llm_predictor = LLMPredictor(
|
59 |
+
llm=OpenAI(
|
60 |
+
temperature=TEMPERATURE,
|
61 |
+
model_name=MODEL_NAME,
|
62 |
+
max_tokens=NUM_OUTPUTS,
|
63 |
+
),
|
64 |
+
)
|
65 |
+
|
66 |
+
# Create the service context
|
67 |
+
service_context = ServiceContext.from_defaults(
|
68 |
+
llm_predictor=llm_predictor,
|
69 |
+
prompt_helper=prompt_helper,
|
70 |
+
)
|
71 |
+
|
72 |
+
# Load the documents
|
73 |
+
documents = SimpleDirectoryReader(folderpath_documents).load_data()
|
74 |
+
|
75 |
+
# Generate the index from documents
|
76 |
+
document_index = GPTVectorStoreIndex.from_documents(
|
77 |
+
documents,
|
78 |
+
service_context=service_context,
|
79 |
+
)
|
80 |
+
|
81 |
+
# Save index to disk
|
82 |
+
document_index.storage_context.persist(persist_dir=folderpath_index)
|
83 |
+
|
84 |
+
return document_index
|
85 |
+
|
86 |
+
|
87 |
+
document_index = construct_index(FOLDERPATH_DOCUMENTS, FOLDERPATH_INDEXES)
|
88 |
+
|
89 |
+
"""
|
90 |
+
# These lines are for testing purposes only.
|
91 |
+
query = input("What do you want to ask? ")
|
92 |
+
query_engine = document_index.as_query_engine()
|
93 |
+
response = query_engine.query("what are the articles about?")
|
94 |
+
print(response)
|
95 |
+
"""
|
src/__init__.py
ADDED
File without changes
|
src/backend/TTChatBot/.sample-env
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ENV_NAME='local'
|
2 |
+
|
3 |
+
DJANGO_SETTINGS_MODULE=config.settings.local
|
4 |
+
DJANGO_SECRET_KEY='django-insecure-xfs(py=^axctf8(#5yd-svkffy3ft0u0z6^*&vx@g#)fttc#sl'
|
5 |
+
DJANGO_DEBUG=True
|
6 |
+
|
7 |
+
# Database
|
8 |
+
# DB_NAME='postgres'
|
9 |
+
# DB_USER='postgres'
|
10 |
+
# DB_PASSWORD='postgres'
|
11 |
+
# DB_HOST='127.0.0.1'
|
12 |
+
# DB_PORT=5678
|
13 |
+
|
14 |
+
# Celery
|
15 |
+
# CELERY_BROKER_URL = 'redis://localhost:6379/0'
|
16 |
+
# CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
|
17 |
+
|
18 |
+
# Other API
|
19 |
+
OPEN_AI_KEY='KEY'
|
20 |
+
|
21 |
+
# Redis config
|
22 |
+
REDIS_HOST = redis
|
23 |
+
REDIS_PORT = 6380
|
24 |
+
BROKER_URL = redis://${REDIS_HOST}:${REDIS_PORT}/0
|
src/backend/TTChatBot/chatbot/__init__.py
ADDED
File without changes
|
src/backend/TTChatBot/chatbot/admin.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# from django.contrib import admin
|
2 |
+
|
3 |
+
# Register your models here.
|
src/backend/TTChatBot/chatbot/apps.py
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
|
5 |
+
import openai
|
6 |
+
import pandas as pd
|
7 |
+
from django.apps import AppConfig
|
8 |
+
from django.conf import settings
|
9 |
+
from llama_index import (
|
10 |
+
StorageContext,
|
11 |
+
load_index_from_storage,
|
12 |
+
)
|
13 |
+
from scipy import spatial
|
14 |
+
|
15 |
+
from .utils import num_tokens_from_messages
|
16 |
+
|
17 |
+
# set OpenAI API key
|
18 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
|
22 |
+
def load_chatgpt_index(apps_names: str, index_file: str):
|
23 |
+
"""Functions to load chatGPT index
|
24 |
+
|
25 |
+
Args:
|
26 |
+
apps_names (str): TokyoTechies or Klever
|
27 |
+
index_file (str): Storage index path
|
28 |
+
"""
|
29 |
+
# build storage context
|
30 |
+
logger.info("Building %s storage context", apps_names)
|
31 |
+
storage_context = StorageContext.from_defaults(
|
32 |
+
persist_dir=index_file,
|
33 |
+
)
|
34 |
+
|
35 |
+
# load index
|
36 |
+
index = load_index_from_storage(storage_context)
|
37 |
+
|
38 |
+
query_engine = index.as_query_engine()
|
39 |
+
logger.info("Loading index from %s storage completed", apps_names)
|
40 |
+
return query_engine
|
41 |
+
|
42 |
+
|
43 |
+
class ChatGPTEmbeddingSearchBased:
|
44 |
+
"""ChatGPT embedding search based method for Kleverbot"""
|
45 |
+
|
46 |
+
def __init__(
|
47 |
+
self,
|
48 |
+
service,
|
49 |
+
embedding_model,
|
50 |
+
chat_model,
|
51 |
+
filepath_embedding,
|
52 |
+
):
|
53 |
+
self.embedding_model = embedding_model
|
54 |
+
self.chat_model = chat_model
|
55 |
+
self.filepath_embedding = filepath_embedding
|
56 |
+
self.service = service
|
57 |
+
# model related config
|
58 |
+
self.token_budget = settings.TOKEN_BUDGET
|
59 |
+
self.introduction_message = settings.INTRODUCTION_MESSAGE.format(
|
60 |
+
service=self.service,
|
61 |
+
)
|
62 |
+
self.system_content = settings.SYSTEM_CONTENT.format(
|
63 |
+
service=self.service,
|
64 |
+
)
|
65 |
+
self.next_article = settings.NEXT_ARTICLE
|
66 |
+
self.embedding_data = self.load_embedding_data()
|
67 |
+
|
68 |
+
def load_embedding_data(self):
|
69 |
+
"""Loading embedding data from csv"""
|
70 |
+
df_data = pd.read_csv(self.filepath_embedding)
|
71 |
+
df_data["embedding"] = df_data["embedding"].apply(
|
72 |
+
ast.literal_eval,
|
73 |
+
)
|
74 |
+
logger.info(
|
75 |
+
"Loading embeddings from %s storage completed",
|
76 |
+
self.filepath_embedding,
|
77 |
+
)
|
78 |
+
return df_data
|
79 |
+
|
80 |
+
# search function
|
81 |
+
def strings_ranked_by_relatedness(
|
82 |
+
self,
|
83 |
+
query: str,
|
84 |
+
df_data: pd.DataFrame,
|
85 |
+
relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
|
86 |
+
top_n: int = 3,
|
87 |
+
) -> tuple[list[str], list[float]]:
|
88 |
+
"""Returns a list of strings and relatednesses,
|
89 |
+
sorted from most related to least.
|
90 |
+
"""
|
91 |
+
query_embedding_response = openai.Embedding.create(
|
92 |
+
model=self.embedding_model,
|
93 |
+
input=query,
|
94 |
+
)
|
95 |
+
query_embedding = query_embedding_response["data"][0]["embedding"]
|
96 |
+
strings_and_relatednesses = [
|
97 |
+
(row["text"], relatedness_fn(query_embedding, row["embedding"]))
|
98 |
+
for i, row in df_data.iterrows()
|
99 |
+
]
|
100 |
+
strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
|
101 |
+
strings, relatednesses = zip(*strings_and_relatednesses)
|
102 |
+
|
103 |
+
return strings[:top_n], relatednesses[:top_n]
|
104 |
+
|
105 |
+
def query_message(
|
106 |
+
self,
|
107 |
+
query: str,
|
108 |
+
dataframe: pd.DataFrame,
|
109 |
+
model: str,
|
110 |
+
token_budget: int,
|
111 |
+
) -> str:
|
112 |
+
"""Return a message for GPT,
|
113 |
+
with relevant source texts pulled from a dataframe.
|
114 |
+
"""
|
115 |
+
strings, _ = self.strings_ranked_by_relatedness(query, dataframe)
|
116 |
+
question = f"\n\nQuestion: {query}"
|
117 |
+
message = self.introduction_message
|
118 |
+
for string in strings:
|
119 |
+
next_article = self.next_article.format(
|
120 |
+
service=self.service,
|
121 |
+
string=string,
|
122 |
+
)
|
123 |
+
if (
|
124 |
+
num_tokens_from_messages(
|
125 |
+
message + next_article + question,
|
126 |
+
model=model,
|
127 |
+
)
|
128 |
+
> token_budget
|
129 |
+
):
|
130 |
+
break
|
131 |
+
else:
|
132 |
+
message += next_article
|
133 |
+
return message + question
|
134 |
+
|
135 |
+
def get_response(
|
136 |
+
self,
|
137 |
+
query: str,
|
138 |
+
data: pd.DataFrame,
|
139 |
+
model: str = "gpt-3.5-turbo",
|
140 |
+
token_budget: int = 4096 - 500,
|
141 |
+
log_message: bool = False,
|
142 |
+
):
|
143 |
+
"""Answers a query using GPT and a dataframe of
|
144 |
+
relevant texts and embeddings.
|
145 |
+
"""
|
146 |
+
message = self.query_message(
|
147 |
+
query=query,
|
148 |
+
dataframe=data,
|
149 |
+
model=model,
|
150 |
+
token_budget=token_budget,
|
151 |
+
)
|
152 |
+
|
153 |
+
if log_message:
|
154 |
+
logging.info(message)
|
155 |
+
|
156 |
+
messages = [
|
157 |
+
{"role": "system", "content": self.system_content},
|
158 |
+
{"role": "user", "content": message},
|
159 |
+
]
|
160 |
+
|
161 |
+
response = openai.ChatCompletion.create(
|
162 |
+
model=model,
|
163 |
+
messages=messages,
|
164 |
+
temperature=0,
|
165 |
+
)
|
166 |
+
response_message = response["choices"][0]["message"]["content"]
|
167 |
+
|
168 |
+
if log_message:
|
169 |
+
logging.info(
|
170 |
+
"Total used tokens: %s",
|
171 |
+
response["usage"]["total_tokens"],
|
172 |
+
)
|
173 |
+
|
174 |
+
return response_message, message
|
175 |
+
|
176 |
+
def chat(self, message):
|
177 |
+
"""Chat with Kleverbot with message, return response from OpenAI"""
|
178 |
+
res, _ = self.get_response(
|
179 |
+
query=message,
|
180 |
+
data=self.embedding_data,
|
181 |
+
model=self.chat_model,
|
182 |
+
token_budget=self.token_budget,
|
183 |
+
)
|
184 |
+
|
185 |
+
# TODO: fix this one after Klever FE fix -> scraper
|
186 |
+
return (
|
187 |
+
res.replace("help/document/", "wiki/1-")
|
188 |
+
.replace(">>", ">")
|
189 |
+
.replace("https://tokyotechies.kleversuite.net", "{ORG_URL}")
|
190 |
+
)
|
191 |
+
|
192 |
+
|
193 |
+
class TTChatbotConfig(AppConfig):
|
194 |
+
"""TokyoTechies Chatbot Init"""
|
195 |
+
|
196 |
+
name = "chatbot"
|
197 |
+
label = "tt_chatbot"
|
198 |
+
|
199 |
+
# old method
|
200 |
+
# INDEXES_FILE = os.path.join(
|
201 |
+
# settings.TT_MODELS_PATH,
|
202 |
+
# settings.TT_MODEL_NAME,
|
203 |
+
# )
|
204 |
+
|
205 |
+
# QUERY_ENGINE = load_chatgpt_index(
|
206 |
+
# apps_names="TokyoTechies",
|
207 |
+
# index_file=INDEXES_FILE,
|
208 |
+
# )
|
209 |
+
|
210 |
+
# new method
|
211 |
+
QUERY_ENGINE = ChatGPTEmbeddingSearchBased(
|
212 |
+
service="TokyoTechies",
|
213 |
+
embedding_model=settings.TT_EMBEDDING_MODEL,
|
214 |
+
chat_model=settings.TT_EMBEDDING_CHAT_MODEL,
|
215 |
+
filepath_embedding=settings.TT_FILEPATH_EMBEDDING,
|
216 |
+
)
|
217 |
+
|
218 |
+
|
219 |
+
class KleverChatbotConfig(AppConfig):
|
220 |
+
"""Klever Chatbot Init"""
|
221 |
+
|
222 |
+
name = "chatbot"
|
223 |
+
label = "klever_chatbot"
|
224 |
+
|
225 |
+
# old method
|
226 |
+
# INDEXES_FILE = os.path.join(
|
227 |
+
# settings.KLEVER_MODELS_PATH,
|
228 |
+
# settings.KLEVER_MODEL_NAME,
|
229 |
+
# )
|
230 |
+
|
231 |
+
# QUERY_ENGINE = load_chatgpt_index(
|
232 |
+
# apps_names="Klever",
|
233 |
+
# index_file=INDEXES_FILE,
|
234 |
+
# )
|
235 |
+
|
236 |
+
# new method
|
237 |
+
QUERY_ENGINE = ChatGPTEmbeddingSearchBased(
|
238 |
+
service="Klever",
|
239 |
+
embedding_model=settings.KLEVER_EMBEDDING_MODEL,
|
240 |
+
chat_model=settings.KLEVER_EMBEDDING_CHAT_MODEL,
|
241 |
+
filepath_embedding=settings.KLEVER_FILEPATH_EMBEDDING,
|
242 |
+
)
|
src/backend/TTChatBot/chatbot/exceptions.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class TTChatBotConnectException(Exception):
|
2 |
+
pass
|
3 |
+
|
4 |
+
|
5 |
+
class TTChatBotEngineException(Exception):
|
6 |
+
pass
|
7 |
+
|
8 |
+
|
9 |
+
class ChatbotVersionException(Exception):
|
10 |
+
pass
|
src/backend/TTChatBot/chatbot/migrations/__init__.py
ADDED
File without changes
|
src/backend/TTChatBot/chatbot/serializers.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from rest_framework import serializers
|
2 |
+
|
3 |
+
|
4 |
+
class ConversationSerializer(serializers.Serializer):
|
5 |
+
"""Conversation content when user interact with the Chatbot"""
|
6 |
+
|
7 |
+
user_chat = serializers.CharField(required=True, max_length=1000)
|
8 |
+
|
9 |
+
|
10 |
+
class MessageSerializer(ConversationSerializer):
|
11 |
+
"""Response content when Chabot outputs to the user"""
|
12 |
+
|
13 |
+
chatbot_answer = serializers.CharField(required=True, max_length=1000)
|
14 |
+
|
15 |
+
|
16 |
+
class TaskSerializer(ConversationSerializer):
|
17 |
+
"""Response content when Chabot outputs to the user"""
|
18 |
+
|
19 |
+
task_id = serializers.CharField(required=True)
|
20 |
+
|
21 |
+
|
22 |
+
class VersionSerializer(serializers.Serializer):
|
23 |
+
"""Trained version of the chatbot"""
|
24 |
+
|
25 |
+
version = serializers.CharField(required=True)
|
src/backend/TTChatBot/chatbot/tasks.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from celery import shared_task
|
4 |
+
from celery.utils.log import get_task_logger
|
5 |
+
from django.conf import settings
|
6 |
+
|
7 |
+
from .apps import (
|
8 |
+
KleverChatbotConfig,
|
9 |
+
TTChatbotConfig,
|
10 |
+
)
|
11 |
+
from .exceptions import (
|
12 |
+
TTChatBotConnectException,
|
13 |
+
TTChatBotEngineException,
|
14 |
+
)
|
15 |
+
from .utils import num_tokens_from_messages
|
16 |
+
|
17 |
+
# TODO: fix the logger does not goes into log file
|
18 |
+
logger = get_task_logger(__name__)
|
19 |
+
|
20 |
+
|
21 |
+
def tt_sync_chat_website(message):
|
22 |
+
return _chat_tt(message, engine="TokyoTechies")
|
23 |
+
|
24 |
+
|
25 |
+
@shared_task(max_retries=0)
|
26 |
+
def tt_async_chat_website(message):
|
27 |
+
max_token = settings.MAX_TOKEN
|
28 |
+
|
29 |
+
if (
|
30 |
+
num_tokens_from_messages(
|
31 |
+
messages=message,
|
32 |
+
model=settings.TT_MODEL_NAME,
|
33 |
+
)
|
34 |
+
>= max_token
|
35 |
+
):
|
36 |
+
logging.warning(
|
37 |
+
"Maximum token %s reached for user messages: %s",
|
38 |
+
max_token,
|
39 |
+
message,
|
40 |
+
)
|
41 |
+
return settings.MAX_TOKEN_RESPONSE
|
42 |
+
else:
|
43 |
+
return _chat_tt(message, TTChatbotConfig)
|
44 |
+
|
45 |
+
|
46 |
+
@shared_task(max_retries=0)
|
47 |
+
def tt_async_chat_klever(message):
|
48 |
+
max_token = settings.MAX_TOKEN
|
49 |
+
|
50 |
+
if (
|
51 |
+
num_tokens_from_messages(
|
52 |
+
messages=message,
|
53 |
+
model=settings.KLEVER_MODEL_NAME,
|
54 |
+
)
|
55 |
+
>= max_token
|
56 |
+
):
|
57 |
+
logging.warning(
|
58 |
+
"Maximum token %s reached for user messages: %s",
|
59 |
+
max_token,
|
60 |
+
message,
|
61 |
+
)
|
62 |
+
return settings.MAX_TOKEN_RESPONSE
|
63 |
+
else:
|
64 |
+
return _chat_tt(message, KleverChatbotConfig)
|
65 |
+
|
66 |
+
|
67 |
+
def _chat_tt(message, engine=None):
|
68 |
+
try:
|
69 |
+
# TODO: check for query retries number of time when disconnected
|
70 |
+
if engine:
|
71 |
+
response = engine.QUERY_ENGINE.chat(message)
|
72 |
+
return response
|
73 |
+
else:
|
74 |
+
raise TTChatBotEngineException("Connect engine failed")
|
75 |
+
except Exception as exc:
|
76 |
+
logging.error("OpenAI error: ", exc_info=exc)
|
77 |
+
raise TTChatBotConnectException(
|
78 |
+
"Connect OpenAPI engine failed",
|
79 |
+
) from exc
|
src/backend/TTChatBot/chatbot/urls.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from django.urls import path
|
2 |
+
|
3 |
+
# from rest_framework.urlpatterns import format_suffix_patterns
|
4 |
+
from . import views
|
5 |
+
|
6 |
+
app_name = "chatbot"
|
7 |
+
|
8 |
+
urlpatterns = [
|
9 |
+
# List and create conversations
|
10 |
+
path("chat/sync", views.ConversationSyncView.as_view()),
|
11 |
+
path(
|
12 |
+
"chat/tokyotechies/async",
|
13 |
+
views.TokyoTechiesConversationAsyncView.as_view(),
|
14 |
+
),
|
15 |
+
path("chat/klever/async", views.KleverConversationAsyncView.as_view()),
|
16 |
+
path(
|
17 |
+
"chat/<str:task_id>/",
|
18 |
+
views.ChatTaskStatus.as_view(),
|
19 |
+
name="gpt_task_status",
|
20 |
+
),
|
21 |
+
path(
|
22 |
+
"chat/tokyotechies/version",
|
23 |
+
views.TTBotVerion.as_view(),
|
24 |
+
name="ttbot_version",
|
25 |
+
),
|
26 |
+
path(
|
27 |
+
"chat/klever/version",
|
28 |
+
views.KleverBotVerion.as_view(),
|
29 |
+
name="kleverbot_version",
|
30 |
+
),
|
31 |
+
]
|
src/backend/TTChatBot/chatbot/utils.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
from datetime import datetime
|
5 |
+
|
6 |
+
import tiktoken
|
7 |
+
|
8 |
+
from .exceptions import ChatbotVersionException
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
def num_tokens_from_messages(
|
14 |
+
messages: str,
|
15 |
+
model: str = "gpt-3.5-turbo-0613",
|
16 |
+
) -> int:
|
17 |
+
"""
|
18 |
+
Return the number of tokens used by a list of messages.
|
19 |
+
"""
|
20 |
+
try:
|
21 |
+
if model in ["text-davinci-003", "text-davinci-002"]:
|
22 |
+
encoding = tiktoken.get_encoding("p50k_base")
|
23 |
+
else: # gpt-4, "gpt-3.5-turbo"
|
24 |
+
encoding = tiktoken.encoding_for_model(model)
|
25 |
+
except KeyError:
|
26 |
+
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
27 |
+
encoding = tiktoken.get_encoding("cl100k_base")
|
28 |
+
|
29 |
+
num_tokens = len(encoding.encode(messages))
|
30 |
+
return num_tokens
|
31 |
+
|
32 |
+
|
33 |
+
def get_datetime_from_file(file_path: str) -> str:
|
34 |
+
"""Get datetime from file
|
35 |
+
|
36 |
+
Args:
|
37 |
+
file_path (str): file path
|
38 |
+
|
39 |
+
Raises:
|
40 |
+
ChatbotVersionException: expcetion when file is not found
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
str: version of the chatbot in format
|
44 |
+
"""
|
45 |
+
try:
|
46 |
+
# Get the modification timestamp of the file
|
47 |
+
file_ts = os.path.getmtime(file_path)
|
48 |
+
return datetime.fromtimestamp(file_ts).strftime("%Y%m%d")
|
49 |
+
|
50 |
+
except FileNotFoundError as exc:
|
51 |
+
logging.error("File %s not found error: ", file_path, exc_info=exc)
|
52 |
+
raise ChatbotVersionException("File not found", exc) from exc
|
53 |
+
|
54 |
+
|
55 |
+
def extract_datetime_from_file(
|
56 |
+
version_file_path: str,
|
57 |
+
weight_file_path: str,
|
58 |
+
) -> str:
|
59 |
+
r"""Extract date from
|
60 |
+
"- Training data includes information up until (\w{3} \d{2})"
|
61 |
+
|
62 |
+
Args:
|
63 |
+
version_file_path (str): file path of version file
|
64 |
+
weight_file_path (str): file path of embedding file
|
65 |
+
to be used when can not get file path of version file
|
66 |
+
|
67 |
+
Raises:
|
68 |
+
ChatbotVersionException: expcetion when file is not found
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
str: version of the chatbot in format
|
72 |
+
"""
|
73 |
+
current_year = datetime.now().year
|
74 |
+
target_line_format = (
|
75 |
+
r"- Training data includes information up until (\w{3} \d{2})"
|
76 |
+
)
|
77 |
+
|
78 |
+
try:
|
79 |
+
# Open the input file for reading
|
80 |
+
with open(version_file_path, encoding="utf-8") as infile:
|
81 |
+
# Iterate through each line in the input file
|
82 |
+
for line in infile:
|
83 |
+
# Use regex to search for the date format in the line
|
84 |
+
match = re.search(target_line_format, line.strip())
|
85 |
+
if match:
|
86 |
+
# If a match is found, extract the date
|
87 |
+
extracted_date = match.group(1)
|
88 |
+
break # Exit the loop after finding the first date
|
89 |
+
|
90 |
+
# Check if a date was extracted and print it
|
91 |
+
if extracted_date:
|
92 |
+
return datetime.strptime(
|
93 |
+
f"{extracted_date} {current_year}",
|
94 |
+
"%b %d %Y",
|
95 |
+
).strftime("%Y%m%d")
|
96 |
+
else:
|
97 |
+
logging.warning(
|
98 |
+
"Date not found in the file, fallback to deployment date",
|
99 |
+
)
|
100 |
+
return get_datetime_from_file(weight_file_path)
|
101 |
+
|
102 |
+
except FileNotFoundError as exc:
|
103 |
+
logging.error(
|
104 |
+
"File %s not found error: ",
|
105 |
+
version_file_path,
|
106 |
+
exc_info=exc,
|
107 |
+
)
|
108 |
+
raise ChatbotVersionException("File not found", exc) from exc
|
src/backend/TTChatBot/chatbot/views.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
|
4 |
+
from celery.result import AsyncResult
|
5 |
+
from django.conf import settings
|
6 |
+
from rest_framework import (
|
7 |
+
generics,
|
8 |
+
status,
|
9 |
+
)
|
10 |
+
from rest_framework.response import Response
|
11 |
+
|
12 |
+
from .exceptions import (
|
13 |
+
ChatbotVersionException,
|
14 |
+
TTChatBotConnectException,
|
15 |
+
TTChatBotEngineException,
|
16 |
+
)
|
17 |
+
from .serializers import (
|
18 |
+
ConversationSerializer,
|
19 |
+
MessageSerializer,
|
20 |
+
TaskSerializer,
|
21 |
+
VersionSerializer,
|
22 |
+
)
|
23 |
+
from .tasks import (
|
24 |
+
tt_async_chat_klever,
|
25 |
+
tt_async_chat_website,
|
26 |
+
tt_sync_chat_website,
|
27 |
+
)
|
28 |
+
from .utils import (
|
29 |
+
extract_datetime_from_file,
|
30 |
+
num_tokens_from_messages,
|
31 |
+
)
|
32 |
+
|
33 |
+
# add logger
|
34 |
+
logger = logging.getLogger(__name__)
|
35 |
+
|
36 |
+
|
37 |
+
class ConversationSyncView(generics.GenericAPIView):
|
38 |
+
serializer_class = ConversationSerializer
|
39 |
+
|
40 |
+
def post(self, request, *args, **kwargs):
|
41 |
+
data = json.loads(request.body.decode("utf-8"))
|
42 |
+
question = data.get("user_chat", None)
|
43 |
+
max_token = settings.MAX_TOKEN
|
44 |
+
|
45 |
+
try:
|
46 |
+
if (
|
47 |
+
num_tokens_from_messages(
|
48 |
+
messages=question,
|
49 |
+
model=settings.TT_MODEL_NAME,
|
50 |
+
)
|
51 |
+
>= max_token
|
52 |
+
):
|
53 |
+
logging.warning(
|
54 |
+
"Maximum token %s reached for user messages: %s",
|
55 |
+
max_token,
|
56 |
+
question,
|
57 |
+
)
|
58 |
+
res = MessageSerializer(
|
59 |
+
{
|
60 |
+
"user_chat": question,
|
61 |
+
"chatbot_answer": settings.MAX_TOKEN_RESPONSE,
|
62 |
+
},
|
63 |
+
)
|
64 |
+
else:
|
65 |
+
answer = tt_sync_chat_website(question)
|
66 |
+
res = MessageSerializer(
|
67 |
+
{
|
68 |
+
"user_chat": question,
|
69 |
+
"chatbot_answer": answer,
|
70 |
+
},
|
71 |
+
)
|
72 |
+
|
73 |
+
except (TTChatBotConnectException, TTChatBotEngineException) as exc:
|
74 |
+
logger.error("Failed to send request to ChatGPT: %s", exc)
|
75 |
+
res = MessageSerializer(
|
76 |
+
{
|
77 |
+
"user_chat": question,
|
78 |
+
"chatbot_answer": settings.DEFAULT_RESPONSE,
|
79 |
+
},
|
80 |
+
)
|
81 |
+
return Response(res.data, status=status.HTTP_400_BAD_REQUEST)
|
82 |
+
|
83 |
+
return Response(res.data, status=status.HTTP_200_OK)
|
84 |
+
|
85 |
+
|
86 |
+
class TokyoTechiesConversationAsyncView(generics.GenericAPIView):
|
87 |
+
serializer_class = ConversationSerializer
|
88 |
+
|
89 |
+
def post(self, request, *args, **kwargs):
|
90 |
+
data = json.loads(request.body.decode("utf-8"))
|
91 |
+
question = data.get("user_chat", None)
|
92 |
+
|
93 |
+
try:
|
94 |
+
answer = tt_async_chat_website.delay(question)
|
95 |
+
res = TaskSerializer(
|
96 |
+
{
|
97 |
+
"user_chat": question,
|
98 |
+
"task_id": answer.id,
|
99 |
+
},
|
100 |
+
)
|
101 |
+
|
102 |
+
except (TTChatBotConnectException, TTChatBotEngineException) as exc:
|
103 |
+
logger.error("Failed to send request to ChatGPT: %s", exc)
|
104 |
+
res = TaskSerializer(
|
105 |
+
{
|
106 |
+
"user_chat": question,
|
107 |
+
"task_id": None,
|
108 |
+
},
|
109 |
+
)
|
110 |
+
return Response(res.data, status=status.HTTP_400_BAD_REQUEST)
|
111 |
+
|
112 |
+
return Response(res.data, status=status.HTTP_200_OK)
|
113 |
+
|
114 |
+
|
115 |
+
class KleverConversationAsyncView(generics.GenericAPIView):
|
116 |
+
serializer_class = ConversationSerializer
|
117 |
+
|
118 |
+
def post(self, request, *args, **kwargs):
|
119 |
+
data = json.loads(request.body.decode("utf-8"))
|
120 |
+
question = data.get("user_chat", None)
|
121 |
+
|
122 |
+
try:
|
123 |
+
answer = tt_async_chat_klever.delay(question)
|
124 |
+
res = TaskSerializer(
|
125 |
+
{
|
126 |
+
"user_chat": question,
|
127 |
+
"task_id": answer.id,
|
128 |
+
},
|
129 |
+
)
|
130 |
+
|
131 |
+
except (TTChatBotConnectException, TTChatBotEngineException) as exc:
|
132 |
+
logger.error("Failed to send request to ChatGPT: %s", exc)
|
133 |
+
res = TaskSerializer(
|
134 |
+
{
|
135 |
+
"user_chat": question,
|
136 |
+
"task_id": None,
|
137 |
+
},
|
138 |
+
)
|
139 |
+
return Response(res.data, status=status.HTTP_400_BAD_REQUEST)
|
140 |
+
|
141 |
+
return Response(res.data, status=status.HTTP_200_OK)
|
142 |
+
|
143 |
+
|
144 |
+
class ChatTaskStatus(generics.GenericAPIView):
|
145 |
+
"""
|
146 |
+
Check the status of ChatGPT task
|
147 |
+
"""
|
148 |
+
|
149 |
+
serializer_class = TaskSerializer
|
150 |
+
|
151 |
+
def get(self, request, task_id, *args, **kwargs):
|
152 |
+
task = AsyncResult(task_id)
|
153 |
+
|
154 |
+
if task.ready():
|
155 |
+
response = task.result
|
156 |
+
logging.info("Task reponse: %s", response)
|
157 |
+
return Response({"status": "READY", "response": response})
|
158 |
+
else:
|
159 |
+
return Response({"status": "PENDING"})
|
160 |
+
|
161 |
+
|
162 |
+
class TTBotVerion(generics.GenericAPIView):
|
163 |
+
"""Get version of TTBot"""
|
164 |
+
|
165 |
+
serializer_class = VersionSerializer
|
166 |
+
|
167 |
+
def get(self, *args, **kwargs):
|
168 |
+
try:
|
169 |
+
ttbot_version = extract_datetime_from_file(
|
170 |
+
version_file_path=settings.TT_TRAINING_VERSION,
|
171 |
+
weight_file_path=settings.TT_FILEPATH_EMBEDDING,
|
172 |
+
)
|
173 |
+
res = VersionSerializer({"version": ttbot_version})
|
174 |
+
return Response(res.data, status=status.HTTP_200_OK)
|
175 |
+
|
176 |
+
except ChatbotVersionException as exc:
|
177 |
+
logger.error("Failed to check version: %s", exc)
|
178 |
+
res = VersionSerializer({"version": None})
|
179 |
+
return Response(res.data, status=status.HTTP_400_BAD_REQUEST)
|
180 |
+
|
181 |
+
|
182 |
+
class KleverBotVerion(generics.GenericAPIView):
|
183 |
+
"""Get version of Klever"""
|
184 |
+
|
185 |
+
serializer_class = VersionSerializer
|
186 |
+
|
187 |
+
def get(self, *args, **kwargs):
|
188 |
+
try:
|
189 |
+
kleverbot_version = extract_datetime_from_file(
|
190 |
+
version_file_path=settings.KLEVER_TRAINING_VERSION,
|
191 |
+
weight_file_path=settings.KLEVER_FILEPATH_EMBEDDING,
|
192 |
+
)
|
193 |
+
res = VersionSerializer({"version": kleverbot_version})
|
194 |
+
return Response(res.data, status=status.HTTP_200_OK)
|
195 |
+
|
196 |
+
except ChatbotVersionException as exc:
|
197 |
+
logger.error("Failed to check version: %s", exc)
|
198 |
+
res = VersionSerializer({"version": None})
|
199 |
+
return Response(res.data, status=status.HTTP_400_BAD_REQUEST)
|
src/backend/TTChatBot/config/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .celery import app as celery_app
|
2 |
+
|
3 |
+
__all__ = ["celery_app"]
|
src/backend/TTChatBot/config/asgi.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
ASGI config for TTChatBot project.
|
3 |
+
|
4 |
+
It exposes the ASGI callable as a module-level variable named ``application``.
|
5 |
+
|
6 |
+
For more information on this file, see
|
7 |
+
https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/
|
8 |
+
"""
|
9 |
+
|
10 |
+
import os
|
11 |
+
|
12 |
+
from django.core.asgi import get_asgi_application
|
13 |
+
|
14 |
+
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
15 |
+
|
16 |
+
application = get_asgi_application()
|
src/backend/TTChatBot/config/celery.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from celery import Celery
|
4 |
+
from django.conf import settings
|
5 |
+
|
6 |
+
# Set the default Django settings module for the 'celery' program.
|
7 |
+
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
8 |
+
|
9 |
+
app = Celery("TTChatBot")
|
10 |
+
|
11 |
+
app.conf.update(
|
12 |
+
broker_connection_retry_on_startup=True,
|
13 |
+
broker_connection_max_retries=10,
|
14 |
+
result_expires=60,
|
15 |
+
task_acks_late=True,
|
16 |
+
)
|
17 |
+
|
18 |
+
app.config_from_object("django.conf:settings", namespace="CELERY")
|
19 |
+
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
|
20 |
+
|
21 |
+
# TODO: convention celery:
|
22 |
+
# https://qiita.com/hankehly/items/c3e0496eb04327a53ac4
|
23 |
+
# TODO: crontab for celery:
|
24 |
+
# https://www.codingforentrepreneurs.com/blog/celery-redis-django/
|
src/backend/TTChatBot/config/settings/__init__.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
# Open AI key
|
8 |
+
OPENAI_API_KEY = os.getenv("OPEN_AI_KEY")
|
9 |
+
# settings keys for model
|
10 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
11 |
+
|
12 |
+
env_name = os.getenv("ENV_NAME", "prod")
|
13 |
+
|
14 |
+
if env_name == "local":
|
15 |
+
from .local import * # noqa
|
16 |
+
elif env_name == "staging":
|
17 |
+
from .staging import * # noqa
|
18 |
+
else:
|
19 |
+
from .prod import * # noqa
|
src/backend/TTChatBot/config/settings/common.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
5 |
+
|
6 |
+
# Static files (CSS, JavaScript, Images)
|
7 |
+
STORAGE_URL = BASE_DIR / "storage"
|
8 |
+
|
9 |
+
# Swagger HTTPS
|
10 |
+
USE_X_FORWARDED_HOST = True
|
11 |
+
SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
|
12 |
+
|
13 |
+
# Application definition
|
14 |
+
|
15 |
+
DJANGO_APPS = [
|
16 |
+
"django.contrib.admin",
|
17 |
+
"django.contrib.auth",
|
18 |
+
"django.contrib.contenttypes",
|
19 |
+
"django.contrib.sessions",
|
20 |
+
"django.contrib.messages",
|
21 |
+
"django.contrib.staticfiles",
|
22 |
+
]
|
23 |
+
|
24 |
+
THIRD_PARTY_APPS = [
|
25 |
+
"gunicorn",
|
26 |
+
"rest_framework",
|
27 |
+
"drf_yasg", # another way to swagger
|
28 |
+
"django_celery_results", # Store Celery Result and cache
|
29 |
+
]
|
30 |
+
|
31 |
+
LOCAL_APPS = [
|
32 |
+
"chatbot.apps",
|
33 |
+
# 'users.apps.UsersConfig',
|
34 |
+
# 'site_settings.apps.SiteSettingsConfig',
|
35 |
+
# 'training_model.apps.TrainingModelConfig',
|
36 |
+
]
|
37 |
+
|
38 |
+
INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
|
39 |
+
|
40 |
+
MIDDLEWARE = [
|
41 |
+
"django.middleware.security.SecurityMiddleware",
|
42 |
+
"django.contrib.sessions.middleware.SessionMiddleware",
|
43 |
+
"django.middleware.common.CommonMiddleware",
|
44 |
+
"django.middleware.csrf.CsrfViewMiddleware",
|
45 |
+
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
46 |
+
"django.contrib.messages.middleware.MessageMiddleware",
|
47 |
+
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
48 |
+
"whitenoise.middleware.WhiteNoiseMiddleware",
|
49 |
+
]
|
50 |
+
|
51 |
+
|
52 |
+
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY")
|
53 |
+
ROOT_URLCONF = "config.urls"
|
54 |
+
WSGI_APPLICATION = "config.wsgi.application"
|
55 |
+
ASGI_APPLICATION = "config.asgi.application"
|
56 |
+
|
57 |
+
TEMPLATES = [
|
58 |
+
{
|
59 |
+
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
60 |
+
"DIRS": [BASE_DIR / "templates"],
|
61 |
+
"APP_DIRS": True,
|
62 |
+
"OPTIONS": {
|
63 |
+
"context_processors": [
|
64 |
+
"django.template.context_processors.debug",
|
65 |
+
"django.template.context_processors.request",
|
66 |
+
"django.contrib.auth.context_processors.auth",
|
67 |
+
"django.contrib.messages.context_processors.messages",
|
68 |
+
],
|
69 |
+
},
|
70 |
+
},
|
71 |
+
]
|
72 |
+
|
73 |
+
# Logging
|
74 |
+
LOGGING = {
|
75 |
+
"version": 1,
|
76 |
+
"disable_existing_loggers": False,
|
77 |
+
"formatters": {
|
78 |
+
"default": {
|
79 |
+
"format": "%(asctime)s %(levelname)s: %(message)s",
|
80 |
+
},
|
81 |
+
},
|
82 |
+
"filters": {
|
83 |
+
"require_debug_false": {
|
84 |
+
"()": "django.utils.log.RequireDebugFalse",
|
85 |
+
},
|
86 |
+
"require_debug_true": {
|
87 |
+
"()": "django.utils.log.RequireDebugTrue",
|
88 |
+
},
|
89 |
+
},
|
90 |
+
"handlers": {
|
91 |
+
"console": {
|
92 |
+
"class": "logging.StreamHandler",
|
93 |
+
"formatter": "default",
|
94 |
+
"level": "INFO",
|
95 |
+
},
|
96 |
+
"common": {
|
97 |
+
"class": "logging.FileHandler",
|
98 |
+
"filename": STORAGE_URL / "common.log",
|
99 |
+
"formatter": "default",
|
100 |
+
"level": "INFO",
|
101 |
+
},
|
102 |
+
},
|
103 |
+
"loggers": {
|
104 |
+
"": {
|
105 |
+
"handlers": ["console", "common"],
|
106 |
+
"level": 1,
|
107 |
+
},
|
108 |
+
},
|
109 |
+
}
|
110 |
+
|
111 |
+
# Internationalization
|
112 |
+
# https://docs.djangoproject.com/en/4.2/topics/i18n/
|
113 |
+
|
114 |
+
LANGUAGE_CODE = "en-us"
|
115 |
+
|
116 |
+
TIME_ZONE = "UTC"
|
117 |
+
|
118 |
+
USE_I18N = True
|
119 |
+
|
120 |
+
USE_TZ = True
|
121 |
+
|
122 |
+
# Static files (CSS, JavaScript, Images)
|
123 |
+
# https://docs.djangoproject.com/en/4.2/howto/static-files/
|
124 |
+
|
125 |
+
STATIC_URL = "/static/"
|
126 |
+
STATIC_ROOT = os.path.join(BASE_DIR, "static")
|
127 |
+
STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
|
128 |
+
|
129 |
+
# Default primary key field type
|
130 |
+
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
|
131 |
+
|
132 |
+
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
src/backend/TTChatBot/config/settings/local.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from .common import * # noqa
|
4 |
+
|
5 |
+
ALLOWED_HOSTS = ["*"]
|
6 |
+
|
7 |
+
# SECURITY WARNING: don't run with debug turned on in production!
|
8 |
+
DEBUG = os.getenv("DJANGO_DEBUG")
|
9 |
+
|
10 |
+
# Chatbot default answer
|
11 |
+
DEFAULT_RESPONSE = "Sorry, I'm having trouble understanding you."
|
12 |
+
DEFAULT_RESPONSE_JP = "申し訳ございません、ご質問を理解いたしかねます"
|
13 |
+
|
14 |
+
MAX_TOKEN_RESPONSE = (
|
15 |
+
"Sorry, I'm having trouble processing all that information. "
|
16 |
+
"Could you summarize a bit more concisely?"
|
17 |
+
)
|
18 |
+
MAX_TOKEN_RESPONSE_JP = "申し訳ございません、いただいた全ての情報を処理することができません。もう少し簡潔にしてください。"
|
19 |
+
|
20 |
+
# --Postgres--
|
21 |
+
# DATABASES = {
|
22 |
+
# 'default': {
|
23 |
+
# 'ENGINE': 'django.db.backends.postgresql',
|
24 |
+
# 'NAME': os.getenv('DB_NAME'),
|
25 |
+
# 'USER': os.getenv('DB_USER'),
|
26 |
+
# 'PASSWORD': os.getenv('DB_PASSWORD'),
|
27 |
+
# 'HOST': os.getenv('DB_HOST', 'localhost'),
|
28 |
+
# 'PORT': os.getenv('DB_PORT'),
|
29 |
+
# }
|
30 |
+
# }
|
31 |
+
|
32 |
+
# --Celery--
|
33 |
+
# List of modules to import when celery starts.
|
34 |
+
# --Worker settings--
|
35 |
+
# If you're doing mostly I/O you can have more processes,
|
36 |
+
# but if mostly spending CPU, try to keep it close to the
|
37 |
+
# number of CPUs on your machine. If not set, the number of CPUs/cores
|
38 |
+
# available will be used.
|
39 |
+
CELERY_WORKER_CONCURRENCY = 1
|
40 |
+
# CELERYD_LOG_FILE = "celeryd.log"
|
41 |
+
# CELERYD_LOG_LEVEL = "INFO"
|
42 |
+
REDIS_HOST = os.getenv("REDIS_HOST")
|
43 |
+
REDIS_PORT = os.getenv("REDIS_PORT")
|
44 |
+
BROKER_URL = os.getenv("BROKER_URL")
|
45 |
+
|
46 |
+
CELERY_BROKER_URL = BROKER_URL
|
47 |
+
CELERY_RESULT_BACKEND = BROKER_URL
|
48 |
+
CELERY_ACCEPT_CONTENT = ["application/json"]
|
49 |
+
CELERY_RESULT_SERIALIZER = "json"
|
50 |
+
CELERY_TASK_SERIALIZER = "json"
|
51 |
+
|
52 |
+
# Config for old query methods
|
53 |
+
# TT Websites Models path
|
54 |
+
TT_MODELS_PATH = "../../../models/TokyoTechies/"
|
55 |
+
# text-davinci-003 or gpt-4
|
56 |
+
TT_MODEL_NAME = "text-davinci-003"
|
57 |
+
# Klever Models path
|
58 |
+
KLEVER_MODELS_PATH = "../../../models/Klever/"
|
59 |
+
# text-davinci-003 or gpt-4
|
60 |
+
KLEVER_MODEL_NAME = "text-davinci-003"
|
61 |
+
|
62 |
+
|
63 |
+
# Config for new embedding methods
|
64 |
+
KLEVER_EMBEDDING_MODEL = (
|
65 |
+
"text-embedding-ada-002" # OpenAI's best embeddings as of Apr 2023
|
66 |
+
)
|
67 |
+
KLEVER_EMBEDDING_CHAT_MODEL = "gpt-3.5-turbo" # "gpt-4"
|
68 |
+
KLEVER_FILEPATH_EMBEDDING = "../../../models/Klever/embeddings/Klever.csv"
|
69 |
+
KLEVER_TRAINING_VERSION = "../../../models/Klever/_version.txt"
|
70 |
+
|
71 |
+
# Config for new embedding methods
|
72 |
+
TT_EMBEDDING_MODEL = (
|
73 |
+
"text-embedding-ada-002" # OpenAI's best embeddings as of Apr 2023
|
74 |
+
)
|
75 |
+
TT_EMBEDDING_CHAT_MODEL = "gpt-3.5-turbo" # "gpt-4"
|
76 |
+
TT_FILEPATH_EMBEDDING = (
|
77 |
+
"../../../models/TokyoTechies/embeddings/TokyoTechies.csv"
|
78 |
+
)
|
79 |
+
TT_TRAINING_VERSION = "../../../models/TokyoTechies/_version.txt"
|
80 |
+
|
81 |
+
INTRODUCTION_MESSAGE = (
|
82 |
+
"You are a chatbot of {service}. "
|
83 |
+
"Use the below articles on the {service} to answer the subsequent question. " # noqa: E501
|
84 |
+
"If the answer cannot be found in the articles, write sorry that I cannot answer your request, please contact our support team for further assistance." # noqa: E501
|
85 |
+
r'If an answer is found, add embedding title in this format "[Title](URL)" to the end of an answer and ignore the same title.' # noqa: E501
|
86 |
+
)
|
87 |
+
SYSTEM_CONTENT = "You answer questions about {service}"
|
88 |
+
NEXT_ARTICLE = "\n{service}" + "article section:\n--\n{string}\n--"
|
89 |
+
TOKEN_BUDGET = 4096 - 500
|
90 |
+
|
91 |
+
# max token
|
92 |
+
MAX_TOKEN = 200
|
93 |
+
|
94 |
+
# CELERYD_TASK_SOFT_TIME_LIMIT = 3
|
95 |
+
# Kill anything longer than 10 seconds:
|
96 |
+
# CELERYD_TASK_TIME_LIMIT = 10
|
97 |
+
# After 2 hours remove the task result:
|
98 |
+
# CELERY_TASK_RESULT_EXPIRES = 60 * 60 * 2
|
src/backend/TTChatBot/config/settings/prod.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from .common import * # noqa
|
4 |
+
|
5 |
+
ALLOWED_HOSTS = ["*"]
|
6 |
+
|
7 |
+
# SECURITY WARNING: don't run with debug turned on in production!
|
8 |
+
DEBUG = False
|
9 |
+
|
10 |
+
# Chatbot default answer
|
11 |
+
DEFAULT_RESPONSE = "Sorry, I'm having trouble understanding you."
|
12 |
+
DEFAULT_RESPONSE_JP = "申し訳ございません、ご質問を理解いたしかねます"
|
13 |
+
|
14 |
+
MAX_TOKEN_RESPONSE = (
|
15 |
+
"Sorry, I'm having trouble processing all that information. "
|
16 |
+
"Could you summarize a bit more concisely?"
|
17 |
+
)
|
18 |
+
MAX_TOKEN_RESPONSE_JP = "申し訳ございません、いただいた全ての情報を処理することができません。もう少し簡潔にしてください。"
|
19 |
+
|
20 |
+
# --Postgres--
|
21 |
+
# DATABASES = {
|
22 |
+
# 'default': {
|
23 |
+
# 'ENGINE': 'django.db.backends.postgresql',
|
24 |
+
# 'NAME': os.getenv('DB_NAME'),
|
25 |
+
# 'USER': os.getenv('DB_USER'),
|
26 |
+
# 'PASSWORD': os.getenv('DB_PASSWORD'),
|
27 |
+
# 'HOST': os.getenv('DB_HOST', 'localhost'),
|
28 |
+
# 'PORT': os.getenv('DB_PORT'),
|
29 |
+
# }
|
30 |
+
# }
|
31 |
+
|
32 |
+
# --Celery--
|
33 |
+
# List of modules to import when celery starts.
|
34 |
+
# --Worker settings--
|
35 |
+
# If you're doing mostly I/O you can have more processes,
|
36 |
+
# but if mostly spending CPU, try to keep it close to the
|
37 |
+
# number of CPUs on your machine. If not set, the number of CPUs/cores
|
38 |
+
# available will be used.
|
39 |
+
CELERY_WORKER_CONCURRENCY = 20
|
40 |
+
# CELERYD_LOG_FILE = "celeryd.log"
|
41 |
+
# CELERYD_LOG_LEVEL = "INFO"
|
42 |
+
REDIS_HOST = os.getenv("REDIS_HOST")
|
43 |
+
REDIS_PORT = os.getenv("REDIS_PORT")
|
44 |
+
BROKER_URL = os.getenv("BROKER_URL")
|
45 |
+
|
46 |
+
CELERY_BROKER_URL = BROKER_URL
|
47 |
+
CELERY_RESULT_BACKEND = BROKER_URL
|
48 |
+
CELERY_ACCEPT_CONTENT = ["application/json"]
|
49 |
+
CELERY_RESULT_SERIALIZER = "json"
|
50 |
+
CELERY_TASK_SERIALIZER = "json"
|
51 |
+
|
52 |
+
# Config for old query methods
|
53 |
+
# TT Websites Models path
|
54 |
+
TT_MODELS_PATH = "../../../models/TokyoTechies/"
|
55 |
+
# text-davinci-003 or gpt-4
|
56 |
+
TT_MODEL_NAME = "text-davinci-003"
|
57 |
+
# Klever Models path
|
58 |
+
KLEVER_MODELS_PATH = "../../../models/Klever/"
|
59 |
+
# text-davinci-003 or gpt-4
|
60 |
+
KLEVER_MODEL_NAME = "text-davinci-003"
|
61 |
+
|
62 |
+
|
63 |
+
# Config for new embedding methods
|
64 |
+
KLEVER_EMBEDDING_MODEL = (
|
65 |
+
"text-embedding-ada-002" # OpenAI's best embeddings as of Apr 2023
|
66 |
+
)
|
67 |
+
KLEVER_EMBEDDING_CHAT_MODEL = "gpt-3.5-turbo" # "gpt-4"
|
68 |
+
KLEVER_FILEPATH_EMBEDDING = "../../../models/Klever/embeddings/Klever.csv"
|
69 |
+
KLEVER_TRAINING_VERSION = "../../../models/Klever/_version.txt"
|
70 |
+
|
71 |
+
# Config for new embedding methods
|
72 |
+
TT_EMBEDDING_MODEL = (
|
73 |
+
"text-embedding-ada-002" # OpenAI's best embeddings as of Apr 2023
|
74 |
+
)
|
75 |
+
TT_EMBEDDING_CHAT_MODEL = "gpt-3.5-turbo" # "gpt-4"
|
76 |
+
TT_FILEPATH_EMBEDDING = (
|
77 |
+
"../../../models/TokyoTechies/embeddings/TokyoTechies.csv"
|
78 |
+
)
|
79 |
+
TT_TRAINING_VERSION = "../../../models/TokyoTechies/_version.txt"
|
80 |
+
|
81 |
+
INTRODUCTION_MESSAGE = (
|
82 |
+
"You are a chatbot of {service}. "
|
83 |
+
"Use the below articles on the {service} to answer the subsequent question. " # noqa: E501
|
84 |
+
"If the answer cannot be found in the articles, write sorry that I cannot answer your request, please contact our support team for further assistance." # noqa: E501
|
85 |
+
r'If an answer is found, add embedding title in this format "[Title](URL)" to the end of an answer and ignore the same title.' # noqa: E501
|
86 |
+
)
|
87 |
+
SYSTEM_CONTENT = "You answer questions about {service}"
|
88 |
+
NEXT_ARTICLE = "\n{service}" + "article section:\n--\n{string}\n--"
|
89 |
+
TOKEN_BUDGET = 4096 - 500
|
90 |
+
|
91 |
+
# max token
|
92 |
+
MAX_TOKEN = 200
|
93 |
+
|
94 |
+
# CELERYD_TASK_SOFT_TIME_LIMIT = 3
|
95 |
+
# Kill anything longer than 10 seconds:
|
96 |
+
# CELERYD_TASK_TIME_LIMIT = 10
|
97 |
+
# After 2 hours remove the task result:
|
98 |
+
# CELERY_TASK_RESULT_EXPIRES = 60 * 60 * 2
|
src/backend/TTChatBot/config/settings/staging.py
ADDED
File without changes
|
src/backend/TTChatBot/config/urls.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
URL configuration for TTChatBot project.
|
3 |
+
|
4 |
+
The `urlpatterns` list routes URLs to views. For more information please see:
|
5 |
+
https://docs.djangoproject.com/en/4.2/topics/http/urls/
|
6 |
+
Examples:
|
7 |
+
Function views
|
8 |
+
1. Add an import: from my_app import views
|
9 |
+
2. Add a URL to urlpatterns: path('', views.home, name='home')
|
10 |
+
Class-based views
|
11 |
+
1. Add an import: from other_app.views import Home
|
12 |
+
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
|
13 |
+
Including another URLconf
|
14 |
+
1. Import the include() function: from django.urls import include, path
|
15 |
+
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
16 |
+
"""
|
17 |
+
from django.conf import settings
|
18 |
+
from django.conf.urls.static import static
|
19 |
+
from django.urls import (
|
20 |
+
include,
|
21 |
+
path,
|
22 |
+
)
|
23 |
+
from drf_yasg import openapi
|
24 |
+
from drf_yasg.generators import OpenAPISchemaGenerator
|
25 |
+
from drf_yasg.views import get_schema_view
|
26 |
+
from rest_framework import permissions
|
27 |
+
|
28 |
+
|
29 |
+
class BothHttpAndHttpsSchemaGenerator(OpenAPISchemaGenerator):
|
30 |
+
def get_schema(self, request=None, public=False):
|
31 |
+
schema = super().get_schema(request, public)
|
32 |
+
schema.schemes = ["http", "https"]
|
33 |
+
return schema
|
34 |
+
|
35 |
+
|
36 |
+
schema_view = get_schema_view(
|
37 |
+
openapi.Info(
|
38 |
+
title="Tokyo Techies Chatbot",
|
39 |
+
default_version="v1",
|
40 |
+
description="API documentation for Toyko Techies Chatbot API",
|
41 |
+
),
|
42 |
+
public=True,
|
43 |
+
generator_class=BothHttpAndHttpsSchemaGenerator,
|
44 |
+
permission_classes=[permissions.AllowAny],
|
45 |
+
)
|
46 |
+
|
47 |
+
urlpatterns = [
|
48 |
+
path("api/v1/", include("chatbot.urls")),
|
49 |
+
# Swagger URLs
|
50 |
+
path(
|
51 |
+
"",
|
52 |
+
schema_view.with_ui("swagger", cache_timeout=0),
|
53 |
+
name="schema-swagger-ui",
|
54 |
+
),
|
55 |
+
]
|
56 |
+
|
57 |
+
# Include static files serving only during development
|
58 |
+
if settings.DEBUG:
|
59 |
+
urlpatterns += static(
|
60 |
+
settings.STATIC_URL,
|
61 |
+
document_root=settings.STATIC_ROOT,
|
62 |
+
)
|
src/backend/TTChatBot/config/wsgi.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
WSGI config for TTChatBot project.
|
3 |
+
|
4 |
+
It exposes the WSGI callable as a module-level variable named ``application``.
|
5 |
+
|
6 |
+
For more information on this file, see
|
7 |
+
https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
|
8 |
+
"""
|
9 |
+
|
10 |
+
import os
|
11 |
+
|
12 |
+
from django.core.wsgi import get_wsgi_application
|
13 |
+
|
14 |
+
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
15 |
+
|
16 |
+
application = get_wsgi_application()
|
src/backend/TTChatBot/manage.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
"""Django's command-line utility for administrative tasks."""
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
|
7 |
+
def main():
|
8 |
+
"""Run administrative tasks."""
|
9 |
+
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
|
10 |
+
try:
|
11 |
+
from django.core.management import execute_from_command_line
|
12 |
+
except ImportError as exc:
|
13 |
+
raise ImportError(
|
14 |
+
"Couldn't import Django. Are you sure it's installed and "
|
15 |
+
"available on your PYTHONPATH environment variable? Did you "
|
16 |
+
"forget to activate a virtual environment?",
|
17 |
+
) from exc
|
18 |
+
execute_from_command_line(sys.argv)
|
19 |
+
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
main()
|
src/backend/TTChatBot/storage/.gitkeep
ADDED
File without changes
|
src/frontend/.gitkeep
ADDED
File without changes
|
src/frontend/.prettierignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
public/**/*
|
src/frontend/.prettierrc
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"semi": false,
|
3 |
+
"singleQuote": true
|
4 |
+
}
|
src/frontend/.sample-env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BACKEND_API_URL=http://localhost:8000/
|
src/frontend/Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG NODE_VERSION=18.16.0
|
2 |
+
ARG ALPINE_VERSION=3.17.2
|
3 |
+
|
4 |
+
FROM node:${NODE_VERSION}-alpine AS node
|
5 |
+
FROM alpine:${ALPINE_VERSION}
|
6 |
+
|
7 |
+
COPY --from=node /usr/lib /usr/lib
|
8 |
+
COPY --from=node /usr/local/lib /usr/local/lib
|
9 |
+
COPY --from=node /usr/local/include /usr/local/include
|
10 |
+
COPY --from=node /usr/local/bin /usr/local/bin
|
11 |
+
|
12 |
+
# create destination directory
|
13 |
+
RUN mkdir -p /src/frontend
|
14 |
+
WORKDIR /src/frontend
|
15 |
+
|
16 |
+
# copy the app
|
17 |
+
COPY . /src/frontend
|
18 |
+
RUN npm install
|
19 |
+
|
20 |
+
EXPOSE 3000
|
21 |
+
|
22 |
+
CMD [ "npm", "start" ]
|
src/frontend/environments/dev/build.args
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BACKEND_API_URL=https://www.chatbot-api.dev.aws.tokyotechies.co.jp/
|
src/frontend/environments/prod/build.args
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BACKEND_API_URL=https://www.chatbot-api.tokyotechies.com/
|
src/frontend/next-env.d.ts
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/// <reference types="next" />
|
2 |
+
/// <reference types="next/image-types/global" />
|
3 |
+
|
4 |
+
// NOTE: This file should not be edited
|
5 |
+
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
src/frontend/next.config.js
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/** @type {import('next').NextConfig} */
|
2 |
+
const nextConfig = {
|
3 |
+
reactStrictMode: true,
|
4 |
+
i18n: {
|
5 |
+
locales: ['en', 'ja'],
|
6 |
+
defaultLocale: 'en',
|
7 |
+
localeDetection: false,
|
8 |
+
},
|
9 |
+
}
|
10 |
+
|
11 |
+
module.exports = nextConfig
|
src/frontend/package-lock.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/frontend/package.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "tokyo-techies-chatbot",
|
3 |
+
"version": "0.1.0",
|
4 |
+
"private": true,
|
5 |
+
"scripts": {
|
6 |
+
"dev": "next dev",
|
7 |
+
"build": "next build",
|
8 |
+
"start": "next start",
|
9 |
+
"lint": "next lint"
|
10 |
+
},
|
11 |
+
"dependencies": {
|
12 |
+
"@types/node": "20.5.3",
|
13 |
+
"@types/react": "18.2.21",
|
14 |
+
"@types/react-dom": "18.2.7",
|
15 |
+
"autoprefixer": "10.4.15",
|
16 |
+
"axios": "^0.27.2",
|
17 |
+
"eslint": "8.47.0",
|
18 |
+
"eslint-config-next": "13.4.19",
|
19 |
+
"eslint-config-prettier": "^8.5.0",
|
20 |
+
"eslint-plugin-prettier": "^4.0.0",
|
21 |
+
"next": "13.4.19",
|
22 |
+
"postcss": "8.4.28",
|
23 |
+
"prettier": "^2.7.0",
|
24 |
+
"react": "18.2.0",
|
25 |
+
"react-dom": "18.2.0",
|
26 |
+
"react-markdown": "^8.0.4",
|
27 |
+
"tailwindcss": "3.3.3",
|
28 |
+
"typescript": "5.1.6"
|
29 |
+
}
|
30 |
+
}
|
src/frontend/postcss.config.js
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module.exports = {
|
2 |
+
plugins: {
|
3 |
+
tailwindcss: {},
|
4 |
+
autoprefixer: {},
|
5 |
+
},
|
6 |
+
}
|
src/frontend/public/favicon.webp
ADDED
![]() |
src/frontend/public/locales/en.ts
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export default {
|
2 |
+
name: 'Techie ',
|
3 |
+
askMe: 'Ask me anything',
|
4 |
+
connected: 'You are connected with a virtual assistant',
|
5 |
+
greeting: 'Hi there! 😊 \n' +
|
6 |
+
'I\'m Techie - a virtual assistant here to help you with anything related to Tokyo Techies.\n' +
|
7 |
+
'If you have any questions, need information, or just want to chat, feel free to ask me!\n' +
|
8 |
+
'How can I help you?',
|
9 |
+
placeholder: 'Type your question...',
|
10 |
+
maintenance: 'Sorry, we are under maintenance!',
|
11 |
+
year: '/',
|
12 |
+
month: '/',
|
13 |
+
day: '',
|
14 |
+
edition: ' version'
|
15 |
+
}
|