mickkhaw commited on
Commit
7a6fa31
·
0 Parent(s):

Initial commit

Browse files
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "RAGalicious"
39
+
40
+ # Show the readme while the thread is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = "https://github.com/mickkhaw1981/ragalicious"
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "1.0.0"
.chainlit/translations/en-US.json ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Settings",
8
+ "settingsKey": "S",
9
+ "APIKeys": "API Keys",
10
+ "logout": "Logout"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "New Chat"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Task List",
22
+ "loading": "Loading...",
23
+ "error": "An error occured"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancel upload",
28
+ "removeAttachment": "Remove attachment"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Create new chat?",
32
+ "clearChat": "This will clear the current messages and start a new chat.",
33
+ "cancel": "Cancel",
34
+ "confirm": "Confirm"
35
+ },
36
+ "settingsModal": {
37
+ "settings": "Settings",
38
+ "expandMessages": "Expand Messages",
39
+ "hideChainOfThought": "Hide Chain of Thought",
40
+ "darkMode": "Dark Mode"
41
+ },
42
+ "detailsButton": {
43
+ "using": "Using",
44
+ "running": "Running",
45
+ "took_one": "Took {{count}} step",
46
+ "took_other": "Took {{count}} steps"
47
+ },
48
+ "auth": {
49
+ "authLogin": {
50
+ "title": "Login to access the app.",
51
+ "form": {
52
+ "email": "Email address",
53
+ "password": "Password",
54
+ "noAccount": "Don't have an account?",
55
+ "alreadyHaveAccount": "Already have an account?",
56
+ "signup": "Sign Up",
57
+ "signin": "Sign In",
58
+ "or": "OR",
59
+ "continue": "Continue",
60
+ "forgotPassword": "Forgot password?",
61
+ "passwordMustContain": "Your password must contain:",
62
+ "emailRequired": "email is a required field",
63
+ "passwordRequired": "password is a required field"
64
+ },
65
+ "error": {
66
+ "default": "Unable to sign in.",
67
+ "signin": "Try signing in with a different account.",
68
+ "oauthsignin": "Try signing in with a different account.",
69
+ "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
70
+ "oauthcallbackerror": "Try signing in with a different account.",
71
+ "oauthcreateaccount": "Try signing in with a different account.",
72
+ "emailcreateaccount": "Try signing in with a different account.",
73
+ "callback": "Try signing in with a different account.",
74
+ "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
75
+ "emailsignin": "The e-mail could not be sent.",
76
+ "emailverify": "Please verify your email, a new email has been sent.",
77
+ "credentialssignin": "Sign in failed. Check the details you provided are correct.",
78
+ "sessionrequired": "Please sign in to access this page."
79
+ }
80
+ },
81
+ "authVerifyEmail": {
82
+ "almostThere": "You're almost there! We've sent an email to ",
83
+ "verifyEmailLink": "Please click on the link in that email to complete your signup.",
84
+ "didNotReceive": "Can't find the email?",
85
+ "resendEmail": "Resend email",
86
+ "goBack": "Go Back",
87
+ "emailSent": "Email sent successfully.",
88
+ "verifyEmail": "Verify your email address"
89
+ },
90
+ "providerButton": {
91
+ "continue": "Continue with {{provider}}",
92
+ "signup": "Sign up with {{provider}}"
93
+ },
94
+ "authResetPassword": {
95
+ "newPasswordRequired": "New password is a required field",
96
+ "passwordsMustMatch": "Passwords must match",
97
+ "confirmPasswordRequired": "Confirm password is a required field",
98
+ "newPassword": "New password",
99
+ "confirmPassword": "Confirm password",
100
+ "resetPassword": "Reset Password"
101
+ },
102
+ "authForgotPassword": {
103
+ "email": "Email address",
104
+ "emailRequired": "email is a required field",
105
+ "emailSent": "Please check the email address {{email}} for instructions to reset your password.",
106
+ "enterEmail": "Enter your email address and we will send you instructions to reset your password.",
107
+ "resendEmail": "Resend email",
108
+ "continue": "Continue",
109
+ "goBack": "Go Back"
110
+ }
111
+ }
112
+ },
113
+ "organisms": {
114
+ "chat": {
115
+ "history": {
116
+ "index": {
117
+ "showHistory": "Show history",
118
+ "lastInputs": "Last Inputs",
119
+ "noInputs": "Such empty...",
120
+ "loading": "Loading..."
121
+ }
122
+ },
123
+ "inputBox": {
124
+ "input": {
125
+ "placeholder": "Type your message here..."
126
+ },
127
+ "speechButton": {
128
+ "start": "Start recording",
129
+ "stop": "Stop recording"
130
+ },
131
+ "SubmitButton": {
132
+ "sendMessage": "Send message",
133
+ "stopTask": "Stop Task"
134
+ },
135
+ "UploadButton": {
136
+ "attachFiles": "Attach files"
137
+ },
138
+ "waterMark": {
139
+ "text": "Built with"
140
+ }
141
+ },
142
+ "Messages": {
143
+ "index": {
144
+ "running": "Running",
145
+ "executedSuccessfully": "executed successfully",
146
+ "failed": "failed",
147
+ "feedbackUpdated": "Feedback updated",
148
+ "updating": "Updating"
149
+ }
150
+ },
151
+ "dropScreen": {
152
+ "dropYourFilesHere": "Drop your files here"
153
+ },
154
+ "index": {
155
+ "failedToUpload": "Failed to upload",
156
+ "cancelledUploadOf": "Cancelled upload of",
157
+ "couldNotReachServer": "Could not reach the server",
158
+ "continuingChat": "Continuing previous chat"
159
+ },
160
+ "settings": {
161
+ "settingsPanel": "Settings panel",
162
+ "reset": "Reset",
163
+ "cancel": "Cancel",
164
+ "confirm": "Confirm"
165
+ }
166
+ },
167
+ "threadHistory": {
168
+ "sidebar": {
169
+ "filters": {
170
+ "FeedbackSelect": {
171
+ "feedbackAll": "Feedback: All",
172
+ "feedbackPositive": "Feedback: Positive",
173
+ "feedbackNegative": "Feedback: Negative"
174
+ },
175
+ "SearchBar": {
176
+ "search": "Search"
177
+ }
178
+ },
179
+ "DeleteThreadButton": {
180
+ "confirmMessage": "This will delete the thread as well as it's messages and elements.",
181
+ "cancel": "Cancel",
182
+ "confirm": "Confirm",
183
+ "deletingChat": "Deleting chat",
184
+ "chatDeleted": "Chat deleted"
185
+ },
186
+ "index": {
187
+ "pastChats": "Past Chats"
188
+ },
189
+ "ThreadList": {
190
+ "empty": "Empty...",
191
+ "today": "Today",
192
+ "yesterday": "Yesterday",
193
+ "previous7days": "Previous 7 days",
194
+ "previous30days": "Previous 30 days"
195
+ },
196
+ "TriggerButton": {
197
+ "closeSidebar": "Close sidebar",
198
+ "openSidebar": "Open sidebar"
199
+ }
200
+ },
201
+ "Thread": {
202
+ "backToChat": "Go back to chat",
203
+ "chatCreatedOn": "This chat was created on"
204
+ }
205
+ },
206
+ "header": {
207
+ "chat": "Chat",
208
+ "readme": "Readme"
209
+ }
210
+ }
211
+ },
212
+ "hooks": {
213
+ "useLLMProviders": {
214
+ "failedToFetchProviders": "Failed to fetch providers:"
215
+ }
216
+ },
217
+ "pages": {
218
+ "Design": {},
219
+ "Env": {
220
+ "savedSuccessfully": "Saved successfully",
221
+ "requiredApiKeys": "Required API Keys",
222
+ "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
223
+ },
224
+ "Page": {
225
+ "notPartOfProject": "You are not part of this project."
226
+ },
227
+ "ResumeButton": {
228
+ "resumeChat": "Resume Chat"
229
+ }
230
+ }
231
+ }
.env.sample ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ OPENAI_API_KEY=
2
+ MYSCALE_HOST=
3
+ MYSCALE_USERNAME=
4
+ MYSCALE_PASSWORD=
5
+ TWILIO_ACCOUNT_SID=
6
+ TWILIO_AUTH_TOKEN=
7
+ SUPABASE_SECRET_KEY=
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.faiss filter=lfs diff=lfs merge=lfs -text
37
+ *.weba filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Audio files
10
+ *.weba
11
+ .files/**/*.weba
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ .pybuilder/
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ # For a library or package, you might want to ignore these files since the code is
91
+ # intended to run in multiple environments; otherwise, check them in:
92
+ # .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # poetry
102
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
104
+ # commonly ignored for libraries.
105
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106
+ #poetry.lock
107
+
108
+ # pdm
109
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110
+ #pdm.lock
111
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112
+ # in version control.
113
+ # https://pdm.fming.dev/#use-with-ide
114
+ .pdm.toml
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
163
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164
+ #.idea/
165
+
166
+ # VS CODE
167
+ .vscode/
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.12
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 mickkhaw1981
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RAGalicious App (AIE3)
3
+ emoji: 👩‍🍳
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # 😋 Welcome to RAGalicious!
12
+
13
+ RAGalicious is a chatbot designed to help users discover delectable recipes from the NYTimes Cooking section. It simplifies the recipe discovery process, providing users inspiration based on ingredients, diet, occasion or equipment you have.
14
+
15
+ ## Example prompts
16
+
17
+ - **Plan your daily meals:** E.g, "Give me ideas for making an easy weeknight dinner."
18
+ - **Get ready to host occasions:** E.g. "What are good dishes to make for Rosh Hashanah?"
19
+ - **Get scrappy with ingredients you already have:** E.g. "What can I make with pasta, lemon and chickpeas?"
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import os
3
+ from pprint import pprint
4
+ import uuid
5
+ import chainlit as cl
6
+ from chainlit.element import ElementBased
7
+ from dotenv import load_dotenv
8
+
9
+ # modules for audio processing
10
+ import httpx
11
+ from langchain.schema.runnable.config import RunnableConfig
12
+ from langchain_openai.chat_models import ChatOpenAI
13
+ from openai import AsyncOpenAI
14
+
15
+ from utils.graph import generate_workflow
16
+
17
+ client = AsyncOpenAI()
18
+
19
+ # ---- ENV VARIABLES ---- #
20
+ load_dotenv()
21
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
22
+ #QDRANT_CLOUD_KEY = os.environ.get("QDRANT_CLOUD_KEY")
23
+ #QDRANT_CLOUD_URL = "https://30591e3d-7092-41c4-95e1-4d3c7ef6e894.us-east4-0.gcp.cloud.qdrant.io"
24
+ #ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
25
+ #ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID")
26
+
27
+
28
+ # -- AUGMENTED -- #
29
+
30
+ # Define the LLM
31
+ base_llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=OPENAI_API_KEY, tags=["base_llm"], temperature=0)
32
+ power_llm = ChatOpenAI(model="gpt-4o", openai_api_key=OPENAI_API_KEY, tags=["base_llm"])
33
+
34
+
35
+ # Conversation starters for the 1st screen
36
+ @cl.set_starters
37
+ async def set_starters():
38
+ return [
39
+ cl.Starter(
40
+ label="Plan your quick daily meals",
41
+ message="Give me ideas for making an easy weeknight dinner that takes less than 25 minutes to prepare",
42
+ icon="/public/meals4.svg",
43
+ ),
44
+ cl.Starter(
45
+ label="Ideas for special occasions",
46
+ message="What are good Middle Eastern dishes to make for Thanksgiving?",
47
+ icon="/public/occasion4.svg",
48
+ ),
49
+ cl.Starter(
50
+ label="Use ingredients you have",
51
+ message="Suggest Spanish recipes that are good for the summer that makes use of tomatoes",
52
+ icon="/public/ingredients4.svg",
53
+ ),
54
+ ]
55
+
56
+
57
+ # Chat Start Function: Initialize a RAG (Retrieval-Augmented Generation) chain at the start of each chat session.
58
+ @cl.on_chat_start
59
+ async def start_chat():
60
+ """
61
+ This function will be called at the start of every user session.
62
+ We will build our LCEL RAG chain here, and store it in the user session.
63
+ The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
64
+ """
65
+
66
+ langgraph_chain = generate_workflow(base_llm, power_llm)
67
+
68
+ cl.user_session.set("langgraph_chain", langgraph_chain)
69
+ cl.user_session.set("thread_id", str(uuid.uuid4()))
70
+
71
+
72
+ # Message Handling Function: Process and respond to user messages using the RAG chain.
73
+ @cl.on_message
74
+ async def main(message: cl.Message):
75
+ """
76
+ This function will be called every time a message is recieved from a session.
77
+ We will use the LCEL RAG chain to generate a response to the user question.
78
+ The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
79
+ """
80
+
81
+ langgraph_chain = cl.user_session.get("langgraph_chain")
82
+ thread_id = cl.user_session.get("thread_id")
83
+ msg = cl.Message(content="")
84
+ langgraph_config = {"configurable": {"thread_id": thread_id, "cl_msg": msg}}
85
+
86
+ async for output in langgraph_chain.astream({"question": message.content}, langgraph_config):
87
+ for key, value in output.items():
88
+ pprint(f"================== Node: '{key}':")
89
+
90
+ await msg.send()
91
+
92
+
93
+ # Speech-to-Text Function: Convert audio file to text
94
+ @cl.step(type="tool")
95
+ async def speech_to_text(audio_file):
96
+ response = await client.audio.transcriptions.create(model="whisper-1", file=audio_file)
97
+ return response.text
98
+
99
+
100
+ # Generate Text Answer Function: Take the output of Speech-to-Text and generate a text answer
101
+ @cl.step(type="tool")
102
+ async def generate_text_answer(transcription):
103
+ base_rag_chain = cl.user_session.get("base_rag_chain")
104
+ msg = cl.Message(content="")
105
+ async for chunk in base_rag_chain.astream(
106
+ {"question": transcription},
107
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
108
+ ):
109
+ if isinstance(chunk, dict) and "response" in chunk and isinstance(chunk["response"], str):
110
+ await msg.stream_token(chunk["response"])
111
+
112
+ return msg.content
113
+
114
+
115
+ # Text-to-Speech Function: Take the text answer generated and convert it to an audio file
116
+ # @cl.step(type="tool")
117
+ # async def text_to_speech(text: str, mime_type: str):
118
+ # CHUNK_SIZE = 2048 # try 4096 or 8192 if getting read timeout error. the bigger the chunk size, the fewer API calls but longer wait time
119
+ # url = f"https://api.elevenlabs.io/v1/text-to-speech/{ELEVENLABS_VOICE_ID}"
120
+ # headers = {"Accept": mime_type, "Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
121
+ # data = {
122
+ # "text": text,
123
+ # "model_id": "eleven_monolingual_v1",
124
+ # "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
125
+ # }
126
+
127
+ # # make an async HTTP POST request to the ElevenLabs API to convert text to speech and return an audio file
128
+ # async with httpx.AsyncClient(timeout=60.0) as client:
129
+ # response = await client.post(url, json=data, headers=headers)
130
+ # response.raise_for_status() # Ensure we notice bad responses
131
+ # buffer = BytesIO()
132
+ # buffer.name = f"output_audio.{mime_type.split('/')[1]}"
133
+ # async for chunk in response.aiter_bytes(chunk_size=CHUNK_SIZE):
134
+ # if chunk:
135
+ # buffer.write(chunk)
136
+
137
+ # buffer.seek(0)
138
+ # return buffer.name, buffer.read()
139
+
140
+
141
+ # ---- AUDIO PROCESSING ---- #
142
+
143
+
144
+ # Audio Chunk Function: Process audio chunks as they arrive from the user's microphone
145
+ @cl.on_audio_chunk
146
+ async def on_audio_chunk(chunk: cl.AudioChunk):
147
+ if chunk.isStart:
148
+ buffer = BytesIO()
149
+ # This is required for whisper to recognize the file type
150
+ buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
151
+ # Initialize the session for a new audio stream
152
+ cl.user_session.set("audio_buffer", buffer)
153
+ cl.user_session.set("audio_mime_type", chunk.mimeType)
154
+
155
+ # For now, write the chunks to a buffer and transcribe the whole audio at the end
156
+ cl.user_session.get("audio_buffer").write(chunk.data)
157
+
158
+
159
+ # Audio End Function: Process the audio file and generate a response
160
+ @cl.on_audio_end
161
+ async def on_audio_end(elements: list[ElementBased]):
162
+ # Get the audio buffer from the session
163
+ audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
164
+ audio_buffer.seek(0) # Move the file pointer to the beginning
165
+ audio_file = audio_buffer.read()
166
+ audio_mime_type: str = cl.user_session.get("audio_mime_type")
167
+ input_audio_el = cl.Audio(mime=audio_mime_type, content=audio_file, name=audio_buffer.name)
168
+ await cl.Message(author="You", type="user_message", content="", elements=[input_audio_el, *elements]).send()
169
+
170
+ whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
171
+
172
+ transcription = await speech_to_text(whisper_input)
173
+ print("Transcription: ", transcription)
174
+
175
+ langgraph_chain = cl.user_session.get("langgraph_chain")
176
+ thread_id = cl.user_session.get("thread_id")
177
+ msg = cl.Message(content="")
178
+ langgraph_config = {"configurable": {"thread_id": thread_id, "cl_msg": msg}}
179
+
180
+ async for output in langgraph_chain.astream({"question": transcription}, langgraph_config):
181
+ for key, value in output.items():
182
+ pprint(f"================== Node: '{key}':")
183
+
184
+ await msg.send()
185
+
186
+ # text_answer = await generate_text_answer(
187
+ # transcription
188
+ # ) # need to change this to generate answer based on base_rag_chain
189
+
190
+ # output_name, output_audio = await text_to_speech(text_answer, audio_mime_type)
191
+
192
+ # output_audio_el = cl.Audio(
193
+ # name=output_name,
194
+ # auto_play=True,
195
+ # mime=audio_mime_type,
196
+ # content=output_audio,
197
+ # )
198
+ # answer_message = await cl.Message(content="").send()
199
+ # answer_message.elements = [output_audio_el]
200
+ # await answer_message.update()
chainlit.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # 😋 Welcome to RAGalicious!
2
+
3
+ RAGalicious is a chatbot designed to help users discover delectable recipes from the NYTimes Cooking section. It simplifies the recipe discovery process, providing users inspiration based on ingredients, diet, occasion or equipment you have.
4
+
5
+ ## Example prompts
6
+
7
+ - **Plan your daily meals:** E.g, "Give me ideas for making an easy weeknight dinner."
8
+ - **Get ready to host occasions:** E.g. "What are good dishes to make for Rosh Hashanah?"
9
+ - **Get scrappy with ingredients you already have:** E.g. "What can I make with pasta, lemon and chickpeas?"
public/avatars/ragalicious.png ADDED
public/favicon.svg ADDED
public/ingredients4.svg ADDED
public/logo_dark.svg ADDED
public/logo_light.svg ADDED
public/meals4.svg ADDED
public/occasion4.svg ADDED
ragalicious.code-workspace ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "folders": [
3
+ {
4
+ "path": "."
5
+ }
6
+ ]
7
+ }
requirements.in ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit==1.1.306
2
+ langchain==0.2.7
3
+ langchain_core==0.2.15
4
+ langchain_openai==0.1.15
5
+ langchain-qdrant==0.1.1
6
+ langchain_community==0.2.5
7
+ langchain_huggingface==0.0.3
8
+ langchain_text_splitters==0.2.1
9
+ qdrant-client==1.10.1
10
+ openai==1.35.13
11
+ python-dotenv==1.0.1
12
+ lark
13
+ clickhouse-connect
14
+ langgraph
15
+ aiosqlite
16
+ supabase
17
+ twilio
requirements.txt ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.12
3
+ # by the following command:
4
+ #
5
+ # pip-compile requirements.in
6
+ #
7
+ aiofiles==23.2.1
8
+ # via chainlit
9
+ aiohttp==3.9.5
10
+ # via
11
+ # aiohttp-retry
12
+ # langchain
13
+ # langchain-community
14
+ # twilio
15
+ aiohttp-retry==2.8.3
16
+ # via twilio
17
+ aiosignal==1.3.1
18
+ # via aiohttp
19
+ aiosqlite==0.20.0
20
+ # via -r requirements.in
21
+ annotated-types==0.7.0
22
+ # via pydantic
23
+ anyio==3.7.1
24
+ # via
25
+ # asyncer
26
+ # httpx
27
+ # openai
28
+ # starlette
29
+ # watchfiles
30
+ asyncer==0.0.2
31
+ # via chainlit
32
+ attrs==23.2.0
33
+ # via aiohttp
34
+ bidict==0.23.1
35
+ # via python-socketio
36
+ certifi==2024.7.4
37
+ # via
38
+ # clickhouse-connect
39
+ # httpcore
40
+ # httpx
41
+ # requests
42
+ chainlit==1.1.306
43
+ # via -r requirements.in
44
+ charset-normalizer==3.3.2
45
+ # via requests
46
+ chevron==0.14.0
47
+ # via literalai
48
+ click==8.1.7
49
+ # via
50
+ # chainlit
51
+ # uvicorn
52
+ clickhouse-connect==0.7.16
53
+ # via -r requirements.in
54
+ dataclasses-json==0.5.14
55
+ # via
56
+ # chainlit
57
+ # langchain-community
58
+ deprecated==1.2.14
59
+ # via
60
+ # opentelemetry-api
61
+ # opentelemetry-exporter-otlp-proto-grpc
62
+ # opentelemetry-exporter-otlp-proto-http
63
+ deprecation==2.1.0
64
+ # via postgrest
65
+ distro==1.9.0
66
+ # via openai
67
+ fastapi==0.110.3
68
+ # via chainlit
69
+ filelock==3.15.4
70
+ # via
71
+ # huggingface-hub
72
+ # torch
73
+ # transformers
74
+ filetype==1.2.0
75
+ # via chainlit
76
+ frozenlist==1.4.1
77
+ # via
78
+ # aiohttp
79
+ # aiosignal
80
+ fsspec==2024.6.1
81
+ # via
82
+ # huggingface-hub
83
+ # torch
84
+ googleapis-common-protos==1.63.2
85
+ # via
86
+ # opentelemetry-exporter-otlp-proto-grpc
87
+ # opentelemetry-exporter-otlp-proto-http
88
+ gotrue==2.6.0
89
+ # via supabase
90
+ greenlet==3.0.3
91
+ # via sqlalchemy
92
+ grpcio==1.65.1
93
+ # via
94
+ # grpcio-tools
95
+ # opentelemetry-exporter-otlp-proto-grpc
96
+ # qdrant-client
97
+ grpcio-tools==1.62.2
98
+ # via qdrant-client
99
+ h11==0.14.0
100
+ # via
101
+ # httpcore
102
+ # uvicorn
103
+ # wsproto
104
+ h2==4.1.0
105
+ # via httpx
106
+ hpack==4.0.0
107
+ # via h2
108
+ httpcore==1.0.5
109
+ # via httpx
110
+ httpx[http2]==0.27.0
111
+ # via
112
+ # chainlit
113
+ # gotrue
114
+ # literalai
115
+ # openai
116
+ # postgrest
117
+ # qdrant-client
118
+ # storage3
119
+ # supabase
120
+ # supafunc
121
+ huggingface-hub==0.24.0
122
+ # via
123
+ # langchain-huggingface
124
+ # sentence-transformers
125
+ # tokenizers
126
+ # transformers
127
+ hyperframe==6.0.1
128
+ # via h2
129
+ idna==3.7
130
+ # via
131
+ # anyio
132
+ # httpx
133
+ # requests
134
+ # yarl
135
+ importlib-metadata==7.1.0
136
+ # via opentelemetry-api
137
+ jinja2==3.1.4
138
+ # via torch
139
+ joblib==1.4.2
140
+ # via scikit-learn
141
+ jsonpatch==1.33
142
+ # via langchain-core
143
+ jsonpointer==3.0.0
144
+ # via jsonpatch
145
+ langchain==0.2.7
146
+ # via
147
+ # -r requirements.in
148
+ # langchain-community
149
+ langchain-community==0.2.5
150
+ # via -r requirements.in
151
+ langchain-core==0.2.15
152
+ # via
153
+ # -r requirements.in
154
+ # langchain
155
+ # langchain-community
156
+ # langchain-huggingface
157
+ # langchain-openai
158
+ # langchain-qdrant
159
+ # langchain-text-splitters
160
+ # langgraph
161
+ langchain-huggingface==0.0.3
162
+ # via -r requirements.in
163
+ langchain-openai==0.1.15
164
+ # via -r requirements.in
165
+ langchain-qdrant==0.1.1
166
+ # via -r requirements.in
167
+ langchain-text-splitters==0.2.1
168
+ # via
169
+ # -r requirements.in
170
+ # langchain
171
+ langgraph==0.1.8
172
+ # via -r requirements.in
173
+ langsmith==0.1.93
174
+ # via
175
+ # langchain
176
+ # langchain-community
177
+ # langchain-core
178
+ lark==1.1.9
179
+ # via -r requirements.in
180
+ lazify==0.4.0
181
+ # via chainlit
182
+ literalai==0.0.607
183
+ # via chainlit
184
+ lz4==4.3.3
185
+ # via clickhouse-connect
186
+ markupsafe==2.1.5
187
+ # via jinja2
188
+ marshmallow==3.21.3
189
+ # via dataclasses-json
190
+ mpmath==1.3.0
191
+ # via sympy
192
+ multidict==6.0.5
193
+ # via
194
+ # aiohttp
195
+ # yarl
196
+ mypy-extensions==1.0.0
197
+ # via typing-inspect
198
+ nest-asyncio==1.6.0
199
+ # via chainlit
200
+ networkx==3.2.1
201
+ # via torch
202
+ numpy==1.26.4
203
+ # via
204
+ # chainlit
205
+ # langchain
206
+ # langchain-community
207
+ # qdrant-client
208
+ # scikit-learn
209
+ # scipy
210
+ # sentence-transformers
211
+ # transformers
212
+ openai==1.35.13
213
+ # via
214
+ # -r requirements.in
215
+ # langchain-openai
216
+ opentelemetry-api==1.25.0
217
+ # via
218
+ # opentelemetry-exporter-otlp-proto-grpc
219
+ # opentelemetry-exporter-otlp-proto-http
220
+ # opentelemetry-instrumentation
221
+ # opentelemetry-sdk
222
+ # opentelemetry-semantic-conventions
223
+ # uptrace
224
+ opentelemetry-exporter-otlp==1.25.0
225
+ # via uptrace
226
+ opentelemetry-exporter-otlp-proto-common==1.25.0
227
+ # via
228
+ # opentelemetry-exporter-otlp-proto-grpc
229
+ # opentelemetry-exporter-otlp-proto-http
230
+ opentelemetry-exporter-otlp-proto-grpc==1.25.0
231
+ # via opentelemetry-exporter-otlp
232
+ opentelemetry-exporter-otlp-proto-http==1.25.0
233
+ # via opentelemetry-exporter-otlp
234
+ opentelemetry-instrumentation==0.46b0
235
+ # via uptrace
236
+ opentelemetry-proto==1.25.0
237
+ # via
238
+ # opentelemetry-exporter-otlp-proto-common
239
+ # opentelemetry-exporter-otlp-proto-grpc
240
+ # opentelemetry-exporter-otlp-proto-http
241
+ opentelemetry-sdk==1.25.0
242
+ # via
243
+ # opentelemetry-exporter-otlp-proto-grpc
244
+ # opentelemetry-exporter-otlp-proto-http
245
+ # uptrace
246
+ opentelemetry-semantic-conventions==0.46b0
247
+ # via opentelemetry-sdk
248
+ orjson==3.10.6
249
+ # via langsmith
250
+ packaging==23.2
251
+ # via
252
+ # chainlit
253
+ # deprecation
254
+ # huggingface-hub
255
+ # langchain-core
256
+ # literalai
257
+ # marshmallow
258
+ # transformers
259
+ pillow==10.4.0
260
+ # via sentence-transformers
261
+ portalocker==2.10.1
262
+ # via qdrant-client
263
+ postgrest==0.16.9
264
+ # via supabase
265
+ protobuf==4.25.3
266
+ # via
267
+ # googleapis-common-protos
268
+ # grpcio-tools
269
+ # opentelemetry-proto
270
+ pydantic==2.8.2
271
+ # via
272
+ # chainlit
273
+ # fastapi
274
+ # gotrue
275
+ # langchain
276
+ # langchain-core
277
+ # langsmith
278
+ # literalai
279
+ # openai
280
+ # postgrest
281
+ # qdrant-client
282
+ pydantic-core==2.20.1
283
+ # via pydantic
284
+ pyjwt==2.8.0
285
+ # via
286
+ # chainlit
287
+ # twilio
288
+ python-dateutil==2.9.0.post0
289
+ # via
290
+ # realtime
291
+ # storage3
292
+ python-dotenv==1.0.1
293
+ # via
294
+ # -r requirements.in
295
+ # chainlit
296
+ python-engineio==4.9.1
297
+ # via python-socketio
298
+ python-multipart==0.0.9
299
+ # via chainlit
300
+ python-socketio==5.11.3
301
+ # via chainlit
302
+ pytz==2024.1
303
+ # via clickhouse-connect
304
+ pyyaml==6.0.1
305
+ # via
306
+ # huggingface-hub
307
+ # langchain
308
+ # langchain-community
309
+ # langchain-core
310
+ # transformers
311
+ qdrant-client==1.10.1
312
+ # via
313
+ # -r requirements.in
314
+ # langchain-qdrant
315
+ realtime==1.0.6
316
+ # via supabase
317
+ regex==2024.5.15
318
+ # via
319
+ # tiktoken
320
+ # transformers
321
+ requests==2.32.3
322
+ # via
323
+ # huggingface-hub
324
+ # langchain
325
+ # langchain-community
326
+ # langsmith
327
+ # opentelemetry-exporter-otlp-proto-http
328
+ # tiktoken
329
+ # transformers
330
+ # twilio
331
+ safetensors==0.4.3
332
+ # via transformers
333
+ scikit-learn==1.5.1
334
+ # via sentence-transformers
335
+ scipy==1.13.1
336
+ # via
337
+ # scikit-learn
338
+ # sentence-transformers
339
+ sentence-transformers==3.0.1
340
+ # via langchain-huggingface
341
+ simple-websocket==1.0.0
342
+ # via python-engineio
343
+ six==1.16.0
344
+ # via python-dateutil
345
+ sniffio==1.3.1
346
+ # via
347
+ # anyio
348
+ # httpx
349
+ # openai
350
+ sqlalchemy==2.0.31
351
+ # via
352
+ # langchain
353
+ # langchain-community
354
+ starlette==0.37.2
355
+ # via
356
+ # chainlit
357
+ # fastapi
358
+ storage3==0.7.7
359
+ # via supabase
360
+ strenum==0.4.15
361
+ # via postgrest
362
+ supabase==2.5.3
363
+ # via -r requirements.in
364
+ supafunc==0.4.7
365
+ # via supabase
366
+ sympy==1.13.1
367
+ # via torch
368
+ syncer==2.0.3
369
+ # via chainlit
370
+ tenacity==8.5.0
371
+ # via
372
+ # langchain
373
+ # langchain-community
374
+ # langchain-core
375
+ threadpoolctl==3.5.0
376
+ # via scikit-learn
377
+ tiktoken==0.7.0
378
+ # via langchain-openai
379
+ tokenizers==0.19.1
380
+ # via
381
+ # langchain-huggingface
382
+ # transformers
383
+ tomli==2.0.1
384
+ # via chainlit
385
+ torch==2.2.2
386
+ # via sentence-transformers
387
+ tqdm==4.66.4
388
+ # via
389
+ # huggingface-hub
390
+ # openai
391
+ # sentence-transformers
392
+ # transformers
393
+ transformers==4.42.4
394
+ # via
395
+ # langchain-huggingface
396
+ # sentence-transformers
397
+ twilio==9.2.3
398
+ # via -r requirements.in
399
+ typing-extensions==4.12.2
400
+ # via
401
+ # aiosqlite
402
+ # fastapi
403
+ # huggingface-hub
404
+ # openai
405
+ # opentelemetry-sdk
406
+ # pydantic
407
+ # pydantic-core
408
+ # realtime
409
+ # sqlalchemy
410
+ # storage3
411
+ # torch
412
+ # typing-inspect
413
+ typing-inspect==0.9.0
414
+ # via dataclasses-json
415
+ uptrace==1.24.0
416
+ # via chainlit
417
+ urllib3==2.2.2
418
+ # via
419
+ # clickhouse-connect
420
+ # qdrant-client
421
+ # requests
422
+ uvicorn==0.25.0
423
+ # via chainlit
424
+ watchfiles==0.20.0
425
+ # via chainlit
426
+ websockets==12.0
427
+ # via realtime
428
+ wrapt==1.16.0
429
+ # via
430
+ # deprecated
431
+ # opentelemetry-instrumentation
432
+ wsproto==1.2.0
433
+ # via simple-websocket
434
+ yarl==1.9.4
435
+ # via aiohttp
436
+ zipp==3.19.2
437
+ # via importlib-metadata
438
+ zstandard==0.23.0
439
+ # via clickhouse-connect
440
+
441
+ # The following packages are considered to be unsafe in a requirements file:
442
+ # setuptools
utils/db.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from supabase import Client, create_client
3
+
4
+ SUPABASE_URL: str = "https://cfivdlyedzbcvjsztebc.supabase.co"
5
+ SUPABASE_SECRET_KEY: str = os.environ["SUPABASE_SECRET_KEY"]
6
+
7
+
8
+ def get_client() -> Client:
9
+ return create_client(SUPABASE_URL, SUPABASE_SECRET_KEY)
10
+
11
+
12
+ def get_recipes(urls: list):
13
+ supabase_client = get_client()
14
+ response = (
15
+ supabase_client.table("recipes")
16
+ .select("url, metadata, features, md_ingredients, md_preparation, md_nutrition, md_description, time")
17
+ .in_("url", urls)
18
+ .limit(10)
19
+ .execute()
20
+ )
21
+
22
+ return [
23
+ {
24
+ "title": r["metadata"]["title"],
25
+ "thumbnail": r["metadata"].get("thumbnail"),
26
+ "url": r["url"],
27
+ "text": f"""
28
+ TITLE: \n
29
+ {r['metadata']['title']}
30
+ \n\n
31
+ ESTIMATED COOKING / PREPARATION TIME: {r['time']} minutes
32
+ \n\n
33
+ DESCRIPTION: \n
34
+ {r['md_description']}
35
+ \n\n
36
+ INGREDIENTS: \n
37
+ {r['md_ingredients']}
38
+ NUTRITIONAL INFORMATION: \n
39
+ {r['md_nutrition']}
40
+ \n\n
41
+ PREP INSTRUCTIONS: \n
42
+ {r['md_preparation']}
43
+
44
+ Source URL: {r['url']}
45
+ \n\n
46
+ """,
47
+ }
48
+ for r in response.data
49
+ ]
50
+
51
+
52
+ def shortlisted_recipes_to_string(recipes):
53
+ output = ""
54
+ if recipes and isinstance(recipes, list):
55
+ for index, r in enumerate(recipes):
56
+ output += f"""Suggestion #{index+1}: {r['text']} \n\n"""
57
+ return output
utils/debug.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ def retriever_output_logger(documents):
2
+ print("returning total results count: ", len(documents))
3
+ for doc in documents:
4
+ print(f"""*** {doc.metadata['title']}
5
+ > Prep Time: {doc.metadata['time']}
6
+ > Occasion: {doc.metadata['occasion']}
7
+ > Cuisine: {doc.metadata['cuisine']}
8
+ > Ingredients: {doc.metadata['ingredients']}""")
9
+ return documents
utils/graph.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import operator
3
+ from pprint import pprint
4
+ from typing import Annotated, List, TypedDict
5
+ import chainlit as cl
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain.schema.runnable.config import RunnableConfig
8
+ from langchain_core.messages import AIMessageChunk, FunctionMessage
9
+ from langchain_core.utils.function_calling import convert_to_openai_function
10
+ from langgraph.checkpoint.aiosqlite import AsyncSqliteSaver
11
+ from langgraph.graph import END, START, StateGraph
12
+ from langgraph.graph.message import add_messages
13
+ from langgraph.prebuilt import ToolExecutor, ToolInvocation
14
+
15
+ from utils.tools import send_text_tool
16
+ from .db import get_recipes, shortlisted_recipes_to_string
17
+ from .graph_chains import (
18
+ get_grader_chain,
19
+ get_question_type_chain,
20
+ get_recipe_url_extractor_chain,
21
+ get_selected_recipe,
22
+ )
23
+ from .retrievers import get_self_retriever
24
+
25
+
26
+ class AgentState(TypedDict):
27
+ question: Annotated[str, operator.setitem]
28
+ question_type: str
29
+ generation: str
30
+ documents: List[str]
31
+ shortlisted_recipes: List[dict]
32
+ selected_recipe: dict
33
+ messages: Annotated[list, add_messages]
34
+
35
+
36
+ def generate_workflow(base_llm, power_llm):
37
+ def _node_question_triage(state: AgentState):
38
+ print("---TRIAGE---")
39
+ question = state["question"]
40
+ messages = state["messages"]
41
+ last_message = messages[-1] if messages else ""
42
+ shortlisted_recipes = state.get("shortlisted_recipes")
43
+ question_type_chain = get_question_type_chain(base_llm)
44
+ question_type_response = question_type_chain.invoke(
45
+ {
46
+ "question": question,
47
+ "context": shortlisted_recipes_to_string(shortlisted_recipes),
48
+ "last_message": last_message,
49
+ }
50
+ )
51
+ question_type_response_data = sorted(
52
+ [
53
+ (question_type_response.send_text, "send_sms"),
54
+ (question_type_response.asking_for_recipe_suggestions, "asking_for_recipe_suggestions"),
55
+ (question_type_response.referring_to_shortlisted_recipes, "referring_to_shortlisted_recipes"),
56
+ (question_type_response.show_specific_recipe, "show_specific_recipe"),
57
+ (question_type_response.referring_to_specific_recipe, "referring_to_specific_recipe"),
58
+ ],
59
+ key=lambda x: x[0],
60
+ reverse=True,
61
+ )
62
+
63
+ pprint(question_type_response_data)
64
+ question_type = question_type_response_data[0][1]
65
+ selected_recipe = None
66
+ if shortlisted_recipes and question_type_response.specific_recipe_url:
67
+ selected_recipe = next(
68
+ (r for r in shortlisted_recipes if r["url"] == question_type_response.specific_recipe_url)
69
+ )
70
+ print("set selected recipe", question_type_response.specific_recipe_url)
71
+ return {"question_type": question_type, "selected_recipe": selected_recipe}
72
+
73
+ async def _node_call_retriever(state: AgentState, config):
74
+ print("---RETRIEVE---")
75
+ cl_msg = config["configurable"]["cl_msg"]
76
+ await cl_msg.stream_token("Searching for recipes matching your criteria ... \n\n")
77
+ question = state["question"]
78
+ vector_db_chain = get_self_retriever(base_llm)
79
+ # Retrieval
80
+ documents = vector_db_chain.invoke(question, return_only_outputs=False)
81
+ print("WOW: ", vector_db_chain.search_kwargs)
82
+ return {"documents": documents, "question": question}
83
+
84
+ async def _node_grade_recipes(state: AgentState, config):
85
+ print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
86
+ cl_msg = config["configurable"]["cl_msg"]
87
+ question = state["question"]
88
+ documents = state["documents"]
89
+ await cl_msg.stream_token(
90
+ f"Evaluating the relevance of {len(documents)} retrieved recipes based on your criteria ... \n\n"
91
+ )
92
+
93
+ retrieval_grader = get_grader_chain(base_llm)
94
+
95
+ # Score each doc
96
+ filtered_docs = []
97
+ for d in documents:
98
+ grader_output = retrieval_grader.invoke({"question": question, "document": d.page_content})
99
+ binary_score = grader_output.binary_score
100
+ score = grader_output.integer_score
101
+
102
+ if binary_score == "yes":
103
+ print("---GRADE: DOCUMENT RELEVANT---: ", score, d.metadata["url"])
104
+ d.metadata["score"] = score
105
+ filtered_docs.append(d)
106
+ else:
107
+ print("---GRADE: DOCUMENT NOT RELEVANT---", score, d.metadata["url"])
108
+ continue
109
+ num_eliminated_docs = len(documents) - len(filtered_docs)
110
+ if num_eliminated_docs > 0:
111
+ await cl_msg.stream_token(
112
+ f"Eliminated {num_eliminated_docs} recipes that were not relevant based on your criteria ... \n\n"
113
+ )
114
+ return {"documents": filtered_docs, "question": question}
115
+
116
+ async def _node_generate_response(state: AgentState, config):
117
+ """
118
+ Determines whether the retrieved recipes are relevant to the question.
119
+
120
+ Args:
121
+ state (messages): The current state
122
+
123
+ Returns:
124
+ str: A decision for whether the documents are relevant or not
125
+ """
126
+
127
+ print("--- GENERATING SHORTLIST ---")
128
+
129
+ question = state["question"]
130
+ documents = state["documents"]
131
+
132
+ # LLM with tool and validation
133
+ base_rag_prompt_template = """\
134
+ You are a friendly AI assistant. Using the provided context,
135
+ please answer the user's question in a friendly, conversational tone.
136
+
137
+ Based on the context provided, please select the top 3 receipes that best fits criteria
138
+ outlined in the question. It doesn't need to be a perfect match but just get the most suitable.
139
+
140
+ For each option, provide the following information:
141
+ 1. A brief description of the recipe
142
+ 2. The URL of the recipe
143
+ 3. The ratings and number of ratings
144
+ Only if question includes a criteria for recipes that are good for a specific occassion, please also provide the occassion(s) of the recipe,
145
+ Only if question includes a criteria a type of cuisine, please also provide the cuisines associated with the recipe.
146
+ Only if question includes a criteria a type of diet, please also provide the diet(s) associated with the recipe.
147
+
148
+ If the context is empty, please be careful to note to the user that there are no recipes matching those specific requirements and do NOT provide any other recipes as suggestions.
149
+
150
+ Context:
151
+ {context}
152
+
153
+ Question:
154
+ {question}
155
+ """
156
+
157
+ base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)
158
+
159
+ chain = base_rag_prompt | power_llm
160
+ full_response = ""
161
+ cl_msg = config["configurable"]["cl_msg"]
162
+ async for chunk in chain.astream(
163
+ {"question": question, "context": documents},
164
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
165
+ ):
166
+ if isinstance(chunk, AIMessageChunk):
167
+ await cl_msg.stream_token(chunk.content)
168
+ full_response += chunk.content
169
+ url_extractor = get_recipe_url_extractor_chain(base_llm)
170
+ url_extractor_results = url_extractor.invoke({"context": full_response})
171
+
172
+ shortlisted_recipes = None
173
+ if isinstance(url_extractor_results.urls, list) and len(url_extractor_results.urls):
174
+ shortlisted_recipes = get_recipes(url_extractor_results.urls)
175
+ return {
176
+ "documents": documents,
177
+ "question": question,
178
+ "shortlisted_recipes": shortlisted_recipes,
179
+ "messages": [full_response],
180
+ }
181
+
182
+ async def _node_shortlist_qa(state: AgentState, config):
183
+ print("--- Q&A with SHORTLISTED RECIPES ---")
184
+
185
+ question = state["question"]
186
+ shortlisted_recipes = state["shortlisted_recipes"]
187
+ messages = state["messages"]
188
+ last_message = messages[-1] if messages else ""
189
+
190
+ # LLM with tool and validation
191
+ base_rag_prompt_template = """\
192
+ You are a friendly AI assistant. Using only the provided context,
193
+ please answer the user's question in a friendly, conversational tone.
194
+ If you don't know the answer based on the context, say you don't know.
195
+
196
+ Context:
197
+ {context}
198
+
199
+ Last message provided to the user:
200
+ {last_message}
201
+
202
+ Question:
203
+ {question}
204
+ """
205
+
206
+ base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)
207
+ chain = base_rag_prompt | power_llm
208
+
209
+ full_response = ""
210
+ cl_msg = config["configurable"]["cl_msg"]
211
+ async for chunk in chain.astream(
212
+ {
213
+ "question": question,
214
+ "context": shortlisted_recipes_to_string(shortlisted_recipes),
215
+ "last_message": last_message,
216
+ },
217
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
218
+ ):
219
+ if isinstance(chunk, AIMessageChunk):
220
+ await cl_msg.stream_token(chunk.content)
221
+ full_response += chunk.content
222
+
223
+ selected_recipe = get_selected_recipe(base_llm, question, shortlisted_recipes, messages)
224
+
225
+ return {"messages": [full_response], "selected_recipe": selected_recipe}
226
+
227
+ async def _node_single_recipe_qa(state: AgentState, config):
228
+ print("--- Q&A with SINGLE RECIPE ---")
229
+
230
+ question = state["question"]
231
+ selected_recipe = state.get("selected_recipe")
232
+ messages = state["messages"]
233
+ last_message = messages[-1] if messages else ""
234
+
235
+ # LLM with tool and validation
236
+ base_rag_prompt_template = """\
237
+ You are a friendly AI assistant. Using only the provided context,
238
+ please answer the user's question in a friendly, conversational tone.
239
+ If you don't know the answer based on the context, say you don't know.
240
+
241
+ Context:
242
+ {context}
243
+
244
+ Last message provided to the user:
245
+ {last_message}
246
+
247
+ Question:
248
+ {question}
249
+ """
250
+
251
+ base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)
252
+ power_llm_with_tool = power_llm.bind_functions([convert_to_openai_function(send_text_tool)])
253
+ chain = base_rag_prompt | power_llm_with_tool
254
+ full_response = ""
255
+ cl_msg = config["configurable"]["cl_msg"]
256
+
257
+ async for chunk in chain.astream(
258
+ {"question": question, "context": selected_recipe["text"], "last_message": last_message},
259
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
260
+ ):
261
+ if isinstance(chunk, AIMessageChunk):
262
+ await cl_msg.stream_token(chunk.content)
263
+ full_response += chunk.content
264
+
265
+ return {"messages": [full_response]}
266
+
267
+ async def _node_send_sms(state: AgentState, config):
268
+ print("--- SEND SMS ---")
269
+
270
+ question = state["question"]
271
+ selected_recipe = state.get("selected_recipe")
272
+ messages = state["messages"]
273
+ last_message = messages[-1] if messages else ""
274
+
275
+ # LLM with tool and validation
276
+ base_rag_prompt_template = """\
277
+ You are a friendly AI assistant.
278
+ Using only the provided context and the tool,
279
+ please fullfill the user's request to send an SMS text
280
+
281
+ Context:
282
+ {context}
283
+
284
+ Last message provided to the user:
285
+ {last_message}
286
+
287
+ Question:
288
+ {question}
289
+ """
290
+
291
+ base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)
292
+ # tool_functions =
293
+ power_llm_with_tool = power_llm.bind_functions([convert_to_openai_function(send_text_tool)])
294
+ chain = base_rag_prompt | power_llm_with_tool
295
+
296
+ tool_executor = ToolExecutor([send_text_tool])
297
+ message = chain.invoke(
298
+ {
299
+ "question": question,
300
+ "context": selected_recipe.get("text") if selected_recipe else "",
301
+ "last_message": last_message,
302
+ },
303
+ )
304
+
305
+ print("message", message)
306
+
307
+ action = ToolInvocation(
308
+ tool=message.additional_kwargs["function_call"]["name"],
309
+ tool_input=json.loads(message.additional_kwargs["function_call"]["arguments"]),
310
+ )
311
+
312
+ response = tool_executor.invoke(action)
313
+
314
+ function_message = FunctionMessage(content=str(response), name=action.tool)
315
+
316
+ return {"messages": [function_message]}
317
+
318
+ workflow = StateGraph(AgentState)
319
+
320
+ # Define the nodes
321
+ workflow.add_node("triage", _node_question_triage) # retrieve
322
+ workflow.add_node("retrieve", _node_call_retriever) # retrieve
323
+ workflow.add_node("grade_recipes", _node_grade_recipes) # grade documents
324
+ workflow.add_node("generate", _node_generate_response) # generatae
325
+ workflow.add_node("shortlist_qa", _node_shortlist_qa) # answer questions about shortlisted recipes
326
+ workflow.add_node("single_qa", _node_single_recipe_qa) # answer questions about shortlisted recipes
327
+ workflow.add_node("send_sms", _node_send_sms) # answer questions about shortlisted recipes
328
+
329
+ # Define the edges
330
+
331
+ def _edge_route_question(state: AgentState):
332
+ print("=======EDGE: START =====")
333
+ question_type = state["question_type"]
334
+ messages = state["messages"]
335
+ shortlisted_recipes = state.get("shortlisted_recipes")
336
+ selected_recipe = state.get("selected_recipe")
337
+
338
+ # if not shortlisted_recipes or len(shortlisted_recipes) == 0:
339
+ # print("going to retrieve since no shortlisted_recipes")
340
+ # return "retrieve"
341
+ if question_type == "asking_for_recipe_suggestions":
342
+ return "retrieve"
343
+ if question_type in ["referring_to_shortlisted_recipes", "show_specific_recipe"]:
344
+ return "shortlist_qa"
345
+ if question_type == "referring_to_specific_recipe" and selected_recipe:
346
+ return "single_qa"
347
+ if question_type == "send_sms":
348
+ return "send_sms"
349
+
350
+ print("defaulting to shortlist_qa")
351
+ return "shortlist_qa"
352
+
353
+ workflow.add_edge(START, "triage")
354
+ workflow.add_conditional_edges(
355
+ "triage",
356
+ _edge_route_question,
357
+ {
358
+ "shortlist_qa": "shortlist_qa",
359
+ "single_qa": "single_qa",
360
+ "retrieve": "retrieve",
361
+ "send_sms": "send_sms",
362
+ },
363
+ )
364
+
365
+ workflow.add_edge("retrieve", "grade_recipes")
366
+ workflow.add_edge("grade_recipes", "generate")
367
+ workflow.add_edge("generate", END)
368
+ workflow.add_edge("shortlist_qa", END)
369
+ workflow.add_edge("single_qa", END)
370
+ workflow.add_edge("send_sms", END)
371
+
372
+ memory = AsyncSqliteSaver.from_conn_string(":memory:")
373
+
374
+ app = workflow.compile(checkpointer=memory)
375
+
376
+ return app
utils/graph_chains.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.output_parsers import PydanticOutputParser
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_core.pydantic_v1 import BaseModel, Field
4
+ from .db import shortlisted_recipes_to_string
5
+
6
+
7
+ def get_grader_chain(llm_model):
8
+ class GradeRecipes(BaseModel):
9
+ """Binary score for relevance check on retrieved documents."""
10
+
11
+ binary_score: str = Field(
12
+ description="Document representing recipes are generally relevant to the criteria in the question, 'yes' or 'no'"
13
+ )
14
+
15
+ integer_score: int = Field(
16
+ description="Degree to which Documents are relevant to the question, integers from 1 to 100"
17
+ )
18
+
19
+ # LLM with function call
20
+ structured_llm_grader = llm_model.with_structured_output(GradeRecipes)
21
+
22
+ # Prompt
23
+ system = """You are a grader assessing relevance of a retrieved cooking recipe document to a user question. \n
24
+ It does not need to be a stringent test. The goal is to filter out completely erroneous or irrelevant retrievals. \n
25
+ If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
26
+ Give a binary score 'yes' or 'no' score to indicate whether the recipe document is relevant to the question.
27
+ Also give a integer score from 1 to 100 to indicate the degree to which the recipe document is relevant to the question.
28
+ """
29
+
30
+ grade_prompt = ChatPromptTemplate.from_messages(
31
+ [
32
+ ("system", system),
33
+ ("human", "Retrieved recipe document: \n\n {document} \n\n User question: {question}"),
34
+ ]
35
+ )
36
+
37
+ retrieval_grader = grade_prompt | structured_llm_grader
38
+
39
+ return retrieval_grader
40
+
41
+
42
+ def get_recipe_url_extractor_chain(llm_model):
43
+ class RecipeUrlsSchema(BaseModel):
44
+ urls: list[str] = Field(description="A list of urls pointing to specific recipes")
45
+
46
+ structured_llm_grader = llm_model.with_structured_output(RecipeUrlsSchema)
47
+
48
+ pydantic_parser = PydanticOutputParser(pydantic_object=RecipeUrlsSchema)
49
+ format_instructions = pydantic_parser.get_format_instructions()
50
+
51
+ RECIPE_SEARCH_PROMPT = """
52
+ Your goal is to understand and parse out the full http urls in the context corresponding to each recipe.
53
+
54
+ {format_instructions}
55
+
56
+ Context:
57
+ {context}
58
+ """
59
+
60
+ prompt = ChatPromptTemplate.from_template(
61
+ template=RECIPE_SEARCH_PROMPT, partial_variables={"format_instructions": format_instructions}
62
+ )
63
+
64
+ retriever = prompt | structured_llm_grader
65
+
66
+ return retriever
67
+
68
+
69
+ def get_recipe_selection_chain(llm_model):
70
+ class RecipeSelectionSchema(BaseModel):
71
+ asking_for_recipe_suggestions: str = Field(
72
+ description="Whether the User Question is asking for recipe suggestions based on some criteria, 'yes' or 'no'"
73
+ )
74
+ referring_to_specific_recipe: str = Field(
75
+ description="Whether the User Question is asking about one specific recipe (but NOT asking to just show a specific recipe), 'yes' or 'no'"
76
+ )
77
+ referring_to_shortlisted_recipes: str = Field(
78
+ description="Whether the User Question is asking generally about the 3 shortlisted recipes, 'yes' or 'no'"
79
+ )
80
+
81
+ show_specific_recipe: str = Field(
82
+ description="Whether the User Question is asking asking to show a specific recipe, 'yes' or 'no'"
83
+ )
84
+
85
+ specific_recipe_url: str = Field(
86
+ description="URL of the specific recipe that the User Question is directed to, if any "
87
+ )
88
+
89
+ # LLM with function call
90
+ structured_llm_grader = llm_model.with_structured_output(RecipeSelectionSchema)
91
+ pydantic_parser = PydanticOutputParser(pydantic_object=RecipeSelectionSchema)
92
+ format_instructions = pydantic_parser.get_format_instructions()
93
+
94
+ # Prompt
95
+ RECIPE_SELECTION_PROMPT = """
96
+ You are a helpful assistant attempting to categorize the nature of the User question
97
+ based on the last message sent to he user and the provided context.
98
+
99
+ {format_instructions}
100
+
101
+ User Question:
102
+ {question}
103
+
104
+ Last message provided to the user:
105
+ {last_message}
106
+
107
+ Context:
108
+ {context}
109
+
110
+
111
+ """
112
+
113
+ prompt = ChatPromptTemplate.from_template(
114
+ template=RECIPE_SELECTION_PROMPT, partial_variables={"format_instructions": format_instructions}
115
+ )
116
+
117
+ chain = prompt | structured_llm_grader
118
+
119
+ return chain
120
+
121
+
122
+ def get_question_type_chain(llm_model):
123
+ class RecipeSelectionChanceSchema(BaseModel):
124
+ asking_for_recipe_suggestions: int = Field(
125
+ description="The likelihood / chance that the User Question is asking for recipe suggestions based on some criteria, integers from 1 to 100"
126
+ )
127
+ referring_to_specific_recipe: int = Field(
128
+ description="The likelihood / chance that the User Question is asking questions about one specific full recipe, integers from 1 to 100"
129
+ )
130
+ referring_to_shortlisted_recipes: int = Field(
131
+ description="The likelihood / chance that the User Question is asking generally about shortlisted recipes provided in the last message, integers from 1 to 100"
132
+ )
133
+
134
+ show_specific_recipe: int = Field(
135
+ description="The likelihood / chance that the User Question is asking to show a specific recipe, integers from 1 to 100"
136
+ )
137
+ send_text: int = Field(
138
+ description="The likelihood / chance that the User Question is to send a SMS or text, integers from 1 to 100"
139
+ )
140
+
141
+ specific_recipe_url: str = Field(
142
+ description="URL of the specific recipe that the User Question is directed to, if any "
143
+ )
144
+
145
+ # LLM with function call
146
+ structured_llm_grader = llm_model.with_structured_output(RecipeSelectionChanceSchema)
147
+ pydantic_parser = PydanticOutputParser(pydantic_object=RecipeSelectionChanceSchema)
148
+ format_instructions = pydantic_parser.get_format_instructions()
149
+
150
+ # Prompt
151
+ RECIPE_SELECTION_PROMPT = """
152
+ You are a helpful assistant attempting to categorize the nature of the User question
153
+ based on the last message sent to he user and the provided context.
154
+ Note that if there were recipe suggesetions in the last message provided to the user,
155
+ it is highly likely that the user is asking questions referring to shortlisted recipes.
156
+ If the last message was a full single recipe, it is generally likely that the user
157
+ is asking questions referring to specific recipe.
158
+
159
+ {format_instructions}
160
+
161
+ User Question:
162
+ {question}
163
+
164
+ Last message provided to the user:
165
+ {last_message}
166
+
167
+ Context:
168
+ {context}
169
+
170
+
171
+ """
172
+
173
+ prompt = ChatPromptTemplate.from_template(
174
+ template=RECIPE_SELECTION_PROMPT, partial_variables={"format_instructions": format_instructions}
175
+ )
176
+
177
+ chain = prompt | structured_llm_grader
178
+
179
+ return chain
180
+
181
+
182
+ def get_selected_recipe(llm_model, question, shortlisted_recipes, messages):
183
+ selected_recipe = None
184
+ recipe_selection_chain = get_recipe_selection_chain(llm_model)
185
+ recipe_selection_response = recipe_selection_chain.invoke(
186
+ {
187
+ "question": question,
188
+ "context": shortlisted_recipes_to_string(shortlisted_recipes),
189
+ "last_message": messages[-1] if messages else "",
190
+ }
191
+ )
192
+
193
+ if (
194
+ recipe_selection_response.referring_to_specific_recipe == "yes"
195
+ and recipe_selection_response.specific_recipe_url
196
+ ):
197
+ selected_recipe = next(
198
+ (r for r in shortlisted_recipes if r["url"] == recipe_selection_response.specific_recipe_url)
199
+ )
200
+ return selected_recipe
utils/metadata.py ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUISINES = [
2
+ "Soul Food",
3
+ "South American",
4
+ "South Asian",
5
+ "Southern",
6
+ "Southwestern",
7
+ "Spanish",
8
+ "Tex Mex",
9
+ "Thai",
10
+ "Turkish",
11
+ "Venezuelan",
12
+ "Vietnamese",
13
+ "West African",
14
+ "African",
15
+ "American",
16
+ "Asian",
17
+ "Australian",
18
+ "Austrian",
19
+ "Belgian",
20
+ "Brazilian",
21
+ "British",
22
+ "Cajun",
23
+ "Caribbean",
24
+ "Central ",
25
+ "Chinese",
26
+ "Creole",
27
+ "Cuban",
28
+ "Eastern",
29
+ "Filipino",
30
+ "French",
31
+ "German",
32
+ "Greek",
33
+ "Indian",
34
+ "Indonesian",
35
+ "Iranian",
36
+ "Irish",
37
+ "Italian",
38
+ "Italian ",
39
+ "Jamaican",
40
+ "Japanese",
41
+ "Jewish",
42
+ "Korean",
43
+ "Latin American",
44
+ "Lebanese",
45
+ "Malaysian",
46
+ "Mediterranean",
47
+ "Mexican",
48
+ "Middle Eastern",
49
+ "Moroccan",
50
+ "New England",
51
+ "Nordic",
52
+ "Pakistani",
53
+ "Portuguese",
54
+ "Russian",
55
+ ]
56
+
57
+ OCCASIONS = [
58
+ "School Lunch",
59
+ "Snack",
60
+ "Spring",
61
+ "St. Patrick's Day",
62
+ "Summer",
63
+ "Super Bowl",
64
+ "Thanksgiving",
65
+ "Thanksgiving Leftover",
66
+ "Valentine's Day",
67
+ "Weekday",
68
+ "Weeknight",
69
+ "Appetizer",
70
+ "Barbecues",
71
+ "Birthday",
72
+ "Breakfast",
73
+ "Brunch",
74
+ "Budget",
75
+ "Christmas",
76
+ "Christmas Cookie",
77
+ "Date Night",
78
+ "Dessert",
79
+ "Dinner",
80
+ "Diwali",
81
+ "Easter",
82
+ "Fall",
83
+ "Father's Day",
84
+ "For One",
85
+ "For Two",
86
+ "Fourth Of July",
87
+ "Halloween",
88
+ "Hanukkah",
89
+ "Kentucky Derby",
90
+ "Kwanzaa",
91
+ "Labor Day",
92
+ "Lunar New Year",
93
+ "Lunch",
94
+ "Main Course",
95
+ "Make Ahead",
96
+ "Mardi Gras",
97
+ "Memorial Day",
98
+ "Mother's Day",
99
+ "New Year's Day",
100
+ "Nowruz",
101
+ "Party",
102
+ "Passover",
103
+ "Picnic",
104
+ "Plan Ahead",
105
+ "Quick",
106
+ "Ramadan",
107
+ "Rosh Hashana",
108
+ ]
109
+
110
+ DIETS = [
111
+ "Vegan",
112
+ "Vegetarian",
113
+ "Dairy Free",
114
+ "Gluten Free",
115
+ "Halal",
116
+ "Healthy",
117
+ "High Fiber",
118
+ "High Protein",
119
+ "Kid Friendly",
120
+ "Kosher",
121
+ "Low Calorie",
122
+ "Low Carb",
123
+ "Low Fat",
124
+ "Low Sodium",
125
+ "Low Sugar",
126
+ "Nut Free",
127
+ "Raw",
128
+ ]
129
+
130
+ EQUIPMENT = [
131
+ "Sheet Pan"
132
+ "Skillet"
133
+ "Slow Cooker"
134
+ "Stovetop"
135
+ "Air Fryer"
136
+ "Barbecue"
137
+ "Cast Iron"
138
+ "Dutch Oven"
139
+ "Food Processor"
140
+ "Freezer Friendly"
141
+ ]
142
+
143
+ KEY_INGREDIENTS = [
144
+ "Anchovy",
145
+ "Andouille Sausage",
146
+ "Apple",
147
+ "Apple Cider",
148
+ "Applesauce",
149
+ "Arborio Rice",
150
+ "Arctic Char",
151
+ "Artichoke",
152
+ "Artichoke Heart",
153
+ "Arugula",
154
+ "Asparagus",
155
+ "Avocado",
156
+ "Baby Carrot",
157
+ "Bacon",
158
+ "Balsamic Vinegar",
159
+ "Banana",
160
+ "Basil",
161
+ "Bay Leaves",
162
+ "Bay Scallop",
163
+ "Beans",
164
+ "Beef",
165
+ "Beef Broth",
166
+ "Beer",
167
+ "Beet",
168
+ "Bell Pepper",
169
+ "Black Bean",
170
+ "Black Eyed Pea",
171
+ "Black Olives",
172
+ "Black Truffle",
173
+ "Blueberry",
174
+ "Bok Choy",
175
+ "Brie",
176
+ "Brisket",
177
+ "Broccoli",
178
+ "Broccoli Rabe",
179
+ "Broccolini",
180
+ "Brown Rice",
181
+ "Brussels Sprout",
182
+ "Butter",
183
+ "Butter Lettuce",
184
+ "Butternut Squash",
185
+ "Cabbage",
186
+ "Canned Tuna",
187
+ "Cannellini Bean",
188
+ "Cantaloupe",
189
+ "Caper",
190
+ "Carrot",
191
+ "Cashew",
192
+ "Catfish",
193
+ "Cauliflower",
194
+ "Caviar",
195
+ "Celery",
196
+ "Chanterelle Mushroom",
197
+ "Cheddar",
198
+ "Cheese",
199
+ "Cherry",
200
+ "Cherry Tomato",
201
+ "Chia Seed",
202
+ "Chicken",
203
+ "Chicken Breast",
204
+ "Chicken Broth",
205
+ "Chicken Wing",
206
+ "Chickpea",
207
+ "Chive",
208
+ "Chocolate Chip",
209
+ "Chorizo",
210
+ "Cilantro",
211
+ "Cinnamon",
212
+ "Cinnamon Stick",
213
+ "Citrus",
214
+ "Clam",
215
+ "Coconut Cream",
216
+ "Coconut Flake",
217
+ "Coconut Milk",
218
+ "Cod",
219
+ "Coffee",
220
+ "Collard Green",
221
+ "Corn",
222
+ "Corn On The Cob",
223
+ "Cornbread",
224
+ "Cornish Hen",
225
+ "Cottage Cheese",
226
+ "Couscous",
227
+ "Crab",
228
+ "Cream",
229
+ "Cream Cheese",
230
+ "Creme Fraiche",
231
+ "Cucumber",
232
+ "Daikon",
233
+ "Dill",
234
+ "Duck",
235
+ "Duck Breast",
236
+ "Duck Fat",
237
+ "Duck Leg",
238
+ "Edamame",
239
+ "Egg",
240
+ "Egg White",
241
+ "Egg Yolk",
242
+ "Eggplant",
243
+ "Endive",
244
+ "Extra Virgin Olive Oil",
245
+ "Farro",
246
+ "Feta",
247
+ "Fettuccine",
248
+ "Fish",
249
+ "Fruit",
250
+ "Garlic",
251
+ "Ginger",
252
+ "Grape",
253
+ "Grape Tomato",
254
+ "Grapefruit",
255
+ "Greek Yogurt",
256
+ "Green Olives",
257
+ "Ground Beef",
258
+ "Ground Meat",
259
+ "Guacamole",
260
+ "Halibut",
261
+ "Ham",
262
+ "Haricot Vert",
263
+ "Harissa",
264
+ "Hazelnut",
265
+ "Hearts Of Palm",
266
+ "Heirloom Tomato",
267
+ "Herb",
268
+ "Honey",
269
+ "Hot Dog",
270
+ "Hummus",
271
+ "Iceberg Lettuce",
272
+ "Italian Bread",
273
+ "Italian Parsley",
274
+ "Italian Sausage",
275
+ "Jalapeno",
276
+ "Kalamata Olive",
277
+ "Kale",
278
+ "Lamb",
279
+ "Lamb Chop",
280
+ "Leek",
281
+ "Lemon",
282
+ "Lettuce",
283
+ "Lime",
284
+ "Lime Juice",
285
+ "Linguine",
286
+ "Littleneck Clam",
287
+ "Lobster",
288
+ "Mackerel",
289
+ "Mango",
290
+ "Maple Syrup",
291
+ "Matcha",
292
+ "Mayonnaise",
293
+ "Melon",
294
+ "Mesclun",
295
+ "Milk",
296
+ "Mint",
297
+ "Mirin",
298
+ "Miso",
299
+ "Miso Paste",
300
+ "Mozzarella",
301
+ "Mushroom",
302
+ "Mustard Green",
303
+ "Napa Cabbage",
304
+ "Noodles",
305
+ "Octopus",
306
+ "Okra",
307
+ "Olive",
308
+ "Olive Oil",
309
+ "Onion",
310
+ "Orange",
311
+ "Oyster",
312
+ "Oyster Mushroom",
313
+ "Pancetta",
314
+ "Parmesan",
315
+ "Parsley",
316
+ "Parsnip",
317
+ "Pasta",
318
+ "Pea",
319
+ "Pea Shoot",
320
+ "Peach",
321
+ "Peanut",
322
+ "Peanut Butter",
323
+ "Pear",
324
+ "Pecan",
325
+ "Pecan Pie",
326
+ "Pecorino",
327
+ "Penne",
328
+ "Pepper",
329
+ "Pine Nut",
330
+ "Pineapple",
331
+ "Pinto Bean",
332
+ "Pita",
333
+ "Pomegranate",
334
+ "Pork",
335
+ "Pork Belly",
336
+ "Pork Tenderloin",
337
+ "Portobello",
338
+ "Potato",
339
+ "Prosciutto",
340
+ "Pumpkin",
341
+ "Quinoa",
342
+ "Red Onion",
343
+ "Rice",
344
+ "Ricotta",
345
+ "Risotto",
346
+ "Romaine",
347
+ "Rosemary",
348
+ "Salmon",
349
+ "Sausage",
350
+ "Sausages",
351
+ "Scallion",
352
+ "Scallop",
353
+ "Sea Bass",
354
+ "Short Rib",
355
+ "Shrimp",
356
+ "Snow Pea",
357
+ "Soba Noodle",
358
+ "Soda",
359
+ "Sour Cream",
360
+ "Spaghetti",
361
+ "Spaghetti Squash",
362
+ "Spinach",
363
+ "Squash",
364
+ "Steak",
365
+ "Strawberry",
366
+ "Sun Dried Tomato",
367
+ "Sunchoke",
368
+ "Sweet Potato",
369
+ "Swiss Chard",
370
+ "Swordfish",
371
+ "Thyme",
372
+ "Tofu",
373
+ "Tomatillo",
374
+ "Tomato",
375
+ "Tomato Paste",
376
+ "Tomato Sauce",
377
+ "Tuna",
378
+ "Turkey",
379
+ "Veal",
380
+ "Vinegar",
381
+ "Watermelon",
382
+ "Wine",
383
+ "Yellow Onion",
384
+ "Yogurt",
385
+ "Zucchini",
386
+ ]
387
+
388
+
389
+ DRINK_INGREDIENTS = [
390
+ "Campari",
391
+ "Candied Ginger",
392
+ "Aperol",
393
+ "Apple Brandy",
394
+ "Apple Juice",
395
+ "Basil",
396
+ "Beer",
397
+ "Carrot Juice",
398
+ "Club Soda",
399
+ "Champagne",
400
+ "Cider",
401
+ "Cognac",
402
+ "Cointreau",
403
+ "Cynar",
404
+ "Dry Vermouth",
405
+ "Gin",
406
+ "Ginger Ale",
407
+ "Ginger Beer",
408
+ "Grand Marnier",
409
+ "Grapefruit Juice",
410
+ "Grappa",
411
+ "Irish Whiskey",
412
+ "Lime",
413
+ "Lime Juice",
414
+ "Mezcal",
415
+ "Pineapple Juice",
416
+ "Port",
417
+ "Prosecco",
418
+ "Ruby Port",
419
+ "Rum",
420
+ "Rye",
421
+ "Rye Whiskey",
422
+ "Scotch",
423
+ "Sparkling Wine",
424
+ "Tequila",
425
+ "Tonic Water",
426
+ "Triple Sec",
427
+ "Vermouth",
428
+ "Vodka",
429
+ "Whiskey",
430
+ "White Wine",
431
+ ]
utils/retrievers.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+ from langchain_core.documents import Document
4
+ from langchain.chains.query_constructor.base import AttributeInfo
5
+ from langchain.retrievers import EnsembleRetriever
6
+ from langchain.retrievers.self_query.base import SelfQueryRetriever
7
+ from langchain_community.vectorstores import (
8
+ MyScale,
9
+ MyScaleSettings,
10
+ )
11
+ from langchain_community.vectorstores.qdrant import Qdrant
12
+ from langchain_core.callbacks.manager import (
13
+ CallbackManagerForRetrieverRun,
14
+ )
15
+ from langchain_openai.embeddings import OpenAIEmbeddings
16
+ from langchain_qdrant.vectorstores import Qdrant
17
+
18
+ from .metadata import CUISINES, DIETS, EQUIPMENT, KEY_INGREDIENTS, OCCASIONS
19
+
20
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
21
+ #QDRANT_CLOUD_KEY = os.environ.get("QDRANT_CLOUD_KEY")
22
+ #QDRANT_CLOUD_URL = "https://30591e3d-7092-41c4-95e1-4d3c7ef6e894.us-east4-0.gcp.cloud.qdrant.io"
23
+
24
+
25
+ # Define embedding model
26
+ base_embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY)
27
+
28
+
29
+ def get_ensemble_retriever():
30
+ # Use a Qdrant VectorStore to embed and store our data
31
+ qdrant_descriptions = Qdrant.from_existing_collection(
32
+ embedding=base_embeddings_model,
33
+ # 3 vector indices - recipe_descriptions, recipe_nutrition, recipe_ingredients
34
+ collection_name="recipe_descriptions",
35
+ url=QDRANT_CLOUD_URL,
36
+ api_key=QDRANT_CLOUD_KEY,
37
+ )
38
+
39
+ qdrant_nutrition = Qdrant.from_existing_collection(
40
+ embedding=base_embeddings_model,
41
+ collection_name="recipe_nutrition",
42
+ url=QDRANT_CLOUD_URL,
43
+ api_key=QDRANT_CLOUD_KEY,
44
+ )
45
+
46
+ qdrant_ingredients = Qdrant.from_existing_collection(
47
+ embedding=base_embeddings_model,
48
+ collection_name="recipe_ingredients",
49
+ url=QDRANT_CLOUD_URL,
50
+ api_key=QDRANT_CLOUD_KEY,
51
+ )
52
+
53
+ # Convert retrieved documents to JSON-serializable format
54
+ descriptions_retriever = qdrant_descriptions.as_retriever(search_kwargs={"k": 20})
55
+ nutrition_retriever = qdrant_nutrition.as_retriever(search_kwargs={"k": 20})
56
+ ingredients_retriever = qdrant_ingredients.as_retriever(search_kwargs={"k": 20})
57
+
58
+ ensemble_retriever = EnsembleRetriever(
59
+ retrievers=[
60
+ descriptions_retriever,
61
+ nutrition_retriever,
62
+ ingredients_retriever,
63
+ ],
64
+ weights=[
65
+ 0.5,
66
+ 0.25,
67
+ 0.25,
68
+ ],
69
+ )
70
+
71
+ return ensemble_retriever
72
+
73
+
74
+ def _list_to_string(l: list) -> str:
75
+ return ", ".join([f"`{item}`" for item in l])
76
+
77
+
78
+ class ModifiedSelfQueryRetriever(SelfQueryRetriever):
79
+ def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:
80
+ """Get documents relevant for a query.
81
+
82
+ Args:
83
+ query: string to find relevant documents for
84
+
85
+ Returns:
86
+ List of relevant documents
87
+ """
88
+ structured_query = self.query_constructor.invoke(
89
+ {"query": query}, config={"callbacks": run_manager.get_child()}
90
+ )
91
+ # if self.verbose:
92
+ # logger.info(f"Generated Query: {structured_query}")
93
+
94
+ new_query, search_kwargs = self._prepare_query(query, structured_query)
95
+
96
+ print("search_kwargs", search_kwargs)
97
+ self.search_kwargs = search_kwargs
98
+
99
+ docs = self._get_docs_with_query(new_query, search_kwargs)
100
+ return docs
101
+
102
+
103
+ def get_self_retriever(llm_model):
104
+ metadata_field_info = [
105
+ AttributeInfo(
106
+ name="cuisine",
107
+ description="The national / ethnic cuisine categories of the recipe."
108
+ f"It should be one of {_list_to_string(CUISINES)}. "
109
+ "It only supports contain comparisons. "
110
+ f"Here are some examples: contain (cuisine, '{CUISINES[0]}')",
111
+ type="list[string]",
112
+ ),
113
+ AttributeInfo(
114
+ name="diet",
115
+ description="The diets / dietary restrictions satisfied by this recipe."
116
+ f"It should be one of {_list_to_string(DIETS)}. "
117
+ "It only supports contain comparisons. "
118
+ f"Here are some examples: contain (diet, '{DIETS[0]}')",
119
+ type="list[string]",
120
+ ),
121
+ AttributeInfo(
122
+ name="equipment",
123
+ description="The equipment required by this recipe."
124
+ f"It should be one of {_list_to_string(EQUIPMENT)}. "
125
+ "It only supports contain comparisons. "
126
+ f"Here are some examples: contain (equipment, '{EQUIPMENT[0]}')",
127
+ type="list[string]",
128
+ ),
129
+ AttributeInfo(
130
+ name="occasion",
131
+ description="The occasions, holidays, celebrations that are well suited for this recipe."
132
+ f"It should be one of {_list_to_string(OCCASIONS)}. "
133
+ "It only supports contain comparisons. "
134
+ f"Here are some examples: contain (occasion, '{OCCASIONS[0]}')",
135
+ type="list[string]",
136
+ ),
137
+ # AttributeInfo(
138
+ # name="ingredients",
139
+ # description="The ingredients used to make this recipe."
140
+ # f"It should be one of {_list_to_string(KEY_INGREDIENTS)}"
141
+ # "It only supports contain comparisons. "
142
+ # f"Here are some examples: contain (ingredients, '{KEY_INGREDIENTS[0]}')",
143
+ # type="list[string]",
144
+ # ),
145
+ AttributeInfo(
146
+ name="time",
147
+ description="The estimated time in minutes required to cook and prepare the recipe",
148
+ type="integer",
149
+ ),
150
+ ]
151
+
152
+ config = MyScaleSettings(
153
+ host=os.environ["MYSCALE_HOST"],
154
+ port=443,
155
+ username=os.environ["MYSCALE_USERNAME"],
156
+ password=os.environ["MYSCALE_PASSWORD"],
157
+ )
158
+ vectorstore = MyScale(base_embeddings_model, config)
159
+
160
+ retriever = ModifiedSelfQueryRetriever.from_llm(
161
+ llm_model,
162
+ vectorstore,
163
+ "Brief summary and key attributes of a recipe, including ingredients, cooking time, occasion, cuisine and diet",
164
+ metadata_field_info,
165
+ verbose=True,
166
+ search_kwargs={"k": 10},
167
+ )
168
+ return retriever
utils/tools.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.pydantic_v1 import BaseModel, Field
3
+ from langchain.tools import tool
4
+ from twilio.rest import Client
5
+
6
+
7
+ class TwilioInputSchema(BaseModel):
8
+ text: str = Field(description="The SMS text body")
9
+ number: str = Field(
10
+ description="The full phone number to send the SMS text to including area code. Example: +12038484755"
11
+ )
12
+
13
+
14
+ @tool("send-text-tool", args_schema=TwilioInputSchema, return_direct=True)
15
+ def send_text_tool(text: str, number: str) -> int:
16
+ """Send an SMS text to a specific number"""
17
+
18
+ print("SENDING text ", text, number)
19
+ tw_client = Client(os.environ["TWILIO_ACCOUNT_SID"], os.environ["TWILIO_AUTH_TOKEN"])
20
+
21
+ message = tw_client.messages.create(
22
+ from_="+18332830457",
23
+ body=text,
24
+ to=number,
25
+ )
26
+ return message.sid