Thomas (Tom) Gardos commited on
Commit
c571e3b
β€’
2 Parent(s): 4bdb9ef 558adb3

Merge pull request #91 from DL4DS/dev_branch

Browse files

Merge to main after dev_branch code restructure

This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitignore +14 -1
  2. Dockerfile +8 -3
  3. Dockerfile.dev +11 -4
  4. README.md +20 -9
  5. {code β†’ apps/ai_tutor}/.chainlit/config.toml +1 -1
  6. apps/ai_tutor/README.md +12 -0
  7. {code β†’ apps/ai_tutor}/app.py +61 -22
  8. code/main.py β†’ apps/ai_tutor/chainlit_app.py +31 -78
  9. {code/modules β†’ apps/ai_tutor}/config/config.yml +5 -5
  10. apps/ai_tutor/config/config_manager.py +189 -0
  11. {code/modules β†’ apps/ai_tutor}/config/constants.py +1 -9
  12. apps/ai_tutor/config/project_config.yml +20 -0
  13. {code/modules β†’ apps/ai_tutor}/config/prompts.py +0 -0
  14. apps/ai_tutor/encrypt_students.py +53 -0
  15. apps/ai_tutor/helpers.py +90 -0
  16. apps/ai_tutor/private/placeholder_students_file.json +5 -0
  17. code/public/avatars/ai_tutor.png β†’ apps/ai_tutor/public/assets/images/avatars/ai-tutor.png +0 -0
  18. code/public/logo_dark.png β†’ apps/ai_tutor/public/assets/images/avatars/ai_tutor.png +0 -0
  19. {code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/acastusphoton-svgrepo-com.svg +0 -0
  20. {code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/adv-screen-recorder-svgrepo-com.svg +0 -0
  21. {code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/alarmy-svgrepo-com.svg +0 -0
  22. {code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/calendar-samsung-17-svgrepo-com.svg +0 -0
  23. apps/ai_tutor/public/files/students_encrypted.json +1 -0
  24. apps/ai_tutor/public/files/test.css +32 -0
  25. code/public/logo_light.png β†’ apps/ai_tutor/public/logo_dark.png +0 -0
  26. apps/ai_tutor/public/logo_light.png +0 -0
  27. {storage β†’ apps/ai_tutor/storage}/data/urls.txt +0 -0
  28. {code β†’ apps/ai_tutor}/templates/cooldown.html +1 -1
  29. {code β†’ apps/ai_tutor}/templates/dashboard.html +1 -1
  30. {code β†’ apps/ai_tutor}/templates/error.html +0 -0
  31. {code β†’ apps/ai_tutor}/templates/error_404.html +0 -0
  32. {code β†’ apps/ai_tutor}/templates/login.html +1 -1
  33. {code β†’ apps/ai_tutor}/templates/logout.html +0 -0
  34. apps/ai_tutor/templates/unauthorized.html +94 -0
  35. apps/chainlit_base/.chainlit/config.toml +120 -0
  36. apps/chainlit_base/chainlit.md +14 -0
  37. {code β†’ apps/chainlit_base}/chainlit_base.py +30 -133
  38. apps/chainlit_base/config/config.yml +60 -0
  39. apps/chainlit_base/config/config_manager.py +174 -0
  40. {code/modules β†’ apps/chainlit_base}/config/project_config.yml +4 -1
  41. apps/chainlit_base/config/prompts.py +97 -0
  42. apps/chainlit_base/public/assets/images/avatars/ai-tutor.png +0 -0
  43. apps/chainlit_base/public/assets/images/avatars/ai_tutor.png +0 -0
  44. apps/chainlit_base/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg +2 -0
  45. apps/chainlit_base/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg +2 -0
  46. apps/chainlit_base/public/assets/images/starter_icons/alarmy-svgrepo-com.svg +2 -0
  47. apps/chainlit_base/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg +36 -0
  48. apps/chainlit_base/public/files/students_encrypted.json +1 -0
  49. {code/public β†’ apps/chainlit_base/public/files}/test.css +8 -5
  50. apps/chainlit_base/public/logo_dark.png +0 -0
.gitignore CHANGED
@@ -169,5 +169,18 @@ code/.chainlit/translations/
169
  storage/logs/*
170
  vectorstores/*
171
 
172
- */.files/*
173
  code/storage/models/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  storage/logs/*
170
  vectorstores/*
171
 
172
+ **/.files/*
173
  code/storage/models/
174
+
175
+ **/translations/en-US.json
176
+ **/translations/zh-CN.json
177
+
178
+
179
+ **/vectorstores/*
180
+
181
+ **/private/students.json
182
+
183
+ **/apps/*/storage/logs/*
184
+ **/apps/*/private/*
185
+
186
+ .idea/
Dockerfile CHANGED
@@ -3,10 +3,12 @@ FROM python:3.11
3
  WORKDIR /code
4
 
5
  COPY ./requirements.txt /code/requirements.txt
 
6
 
7
  RUN pip install --upgrade pip
8
 
9
  RUN pip install --no-cache-dir -r /code/requirements.txt
 
10
 
11
  COPY . /code
12
 
@@ -17,12 +19,15 @@ RUN ls -R /code
17
  RUN chmod -R 777 /code
18
 
19
  # Create a logs directory and set permissions
20
- RUN mkdir /code/logs && chmod 777 /code/logs
21
 
22
  # Create a cache directory within the application's working directory
23
  RUN mkdir /.cache && chmod -R 777 /.cache
24
 
25
- WORKDIR /code/code
 
 
 
26
 
27
  RUN --mount=type=secret,id=HUGGINGFACEHUB_API_TOKEN,mode=0444,required=true
28
  RUN --mount=type=secret,id=OPENAI_API_KEY,mode=0444,required=true
@@ -35,4 +40,4 @@ RUN --mount=type=secret,id=LITERAL_API_KEY_LOGGING,mode=0444,required=true
35
  RUN --mount=type=secret,id=CHAINLIT_AUTH_SECRET,mode=0444,required=true
36
 
37
  # Default command to run the application
38
- CMD ["sh", "-c", "python -m modules.vectorstore.store_manager && uvicorn app:app --host 0.0.0.0 --port 7860"]
 
3
  WORKDIR /code
4
 
5
  COPY ./requirements.txt /code/requirements.txt
6
+ COPY ./setup.py /code/setup.py
7
 
8
  RUN pip install --upgrade pip
9
 
10
  RUN pip install --no-cache-dir -r /code/requirements.txt
11
+ RUN pip install -e .
12
 
13
  COPY . /code
14
 
 
19
  RUN chmod -R 777 /code
20
 
21
  # Create a logs directory and set permissions
22
+ RUN mkdir /code/apps/ai_tutor/logs && chmod 777 /code/apps/ai_tutor/logs
23
 
24
  # Create a cache directory within the application's working directory
25
  RUN mkdir /.cache && chmod -R 777 /.cache
26
 
27
+ WORKDIR /code/apps/ai_tutor
28
+
29
+ # Expose the port the app runs on
30
+ EXPOSE 7860
31
 
32
  RUN --mount=type=secret,id=HUGGINGFACEHUB_API_TOKEN,mode=0444,required=true
33
  RUN --mount=type=secret,id=OPENAI_API_KEY,mode=0444,required=true
 
40
  RUN --mount=type=secret,id=CHAINLIT_AUTH_SECRET,mode=0444,required=true
41
 
42
  # Default command to run the application
43
+ CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860
Dockerfile.dev CHANGED
@@ -3,13 +3,18 @@ FROM python:3.11
3
  WORKDIR /code
4
 
5
  COPY ./requirements.txt /code/requirements.txt
 
6
 
7
  RUN pip install --upgrade pip
8
 
9
  RUN pip install --no-cache-dir -r /code/requirements.txt
 
10
 
11
  COPY . /code
12
 
 
 
 
13
  # List the contents of the /code directory to verify files are copied correctly
14
  RUN ls -R /code
15
 
@@ -17,15 +22,17 @@ RUN ls -R /code
17
  RUN chmod -R 777 /code
18
 
19
  # Create a logs directory and set permissions
20
- RUN mkdir /code/logs && chmod 777 /code/logs
21
 
22
  # Create a cache directory within the application's working directory
23
  RUN mkdir /.cache && chmod -R 777 /.cache
24
 
25
- WORKDIR /code/code
 
 
26
 
27
  # Expose the port the app runs on
28
- EXPOSE 8000
29
 
30
  # Default command to run the application
31
- CMD ["sh", "-c", "python -m modules.vectorstore.store_manager && chainlit run main.py --host 0.0.0.0 --port 8000"]
 
3
  WORKDIR /code
4
 
5
  COPY ./requirements.txt /code/requirements.txt
6
+ COPY ./setup.py /code/setup.py
7
 
8
  RUN pip install --upgrade pip
9
 
10
  RUN pip install --no-cache-dir -r /code/requirements.txt
11
+ RUN pip install -e .
12
 
13
  COPY . /code
14
 
15
+ # Copy .env file to the application directory
16
+ COPY .env /code/apps/ai_tutor/.env
17
+
18
  # List the contents of the /code directory to verify files are copied correctly
19
  RUN ls -R /code
20
 
 
22
  RUN chmod -R 777 /code
23
 
24
  # Create a logs directory and set permissions
25
+ RUN mkdir /code/apps/ai_tutor/logs && chmod 777 /code/apps/ai_tutor/logs
26
 
27
  # Create a cache directory within the application's working directory
28
  RUN mkdir /.cache && chmod -R 777 /.cache
29
 
30
+ WORKDIR /code/apps/ai_tutor
31
+
32
+ RUN ls -R /code
33
 
34
  # Expose the port the app runs on
35
+ EXPOSE 7860
36
 
37
  # Default command to run the application
38
+ CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860
README.md CHANGED
@@ -9,6 +9,12 @@ app_port: 7860
9
  ---
10
  # DL4DS Tutor πŸƒ
11
 
 
 
 
 
 
 
12
  Check out the configuration reference at [Hugging Face Spaces Config Reference](https://huggingface.co/docs/hub/spaces-config-reference).
13
 
14
  You can find a "production" implementation of the Tutor running live at [DL4DS Tutor](https://dl4ds-dl4ds-tutor.hf.space/) from the
@@ -31,26 +37,31 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more
31
  git clone https://github.com/DL4DS/dl4ds_tutor
32
  ```
33
 
34
- 2. **Put your data under the `storage/data` directory**
 
 
 
 
 
 
35
  - Add URLs in the `urls.txt` file.
36
- - Add other PDF files in the `storage/data` directory.
37
 
38
  3. **To test Data Loading (Optional)**
39
  ```bash
40
- cd code
41
- python -m modules.dataloader.data_loader --links "your_pdf_link"
42
  ```
43
 
44
  4. **Create the Vector Database**
45
  ```bash
46
- cd code
47
- python -m modules.vectorstore.store_manager
48
  ```
49
- - Note: You need to run the above command when you add new data to the `storage/data` directory, or if the `storage/data/urls.txt` file is updated.
50
 
51
  6. **Run the FastAPI App**
52
  ```bash
53
- cd code
54
  uvicorn app:app --port 7860
55
  ```
56
 
@@ -65,7 +76,7 @@ The HuggingFace Space is built using the `Dockerfile` in the repository. To run
65
 
66
  ```bash
67
  docker build --tag dev -f Dockerfile.dev .
68
- docker run -it --rm -p 8000:8000 dev
69
  ```
70
 
71
  ## Contributing
 
9
  ---
10
  # DL4DS Tutor πŸƒ
11
 
12
+ ![Build Status](https://github.com/DL4DS/dl4ds_tutor/actions/workflows/push_to_hf_space.yml/badge.svg)
13
+ ![License](https://img.shields.io/github/license/DL4DS/dl4ds_tutor)
14
+ ![GitHub stars](https://img.shields.io/github/stars/DL4DS/dl4ds_tutor)
15
+ ![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)
16
+
17
+
18
  Check out the configuration reference at [Hugging Face Spaces Config Reference](https://huggingface.co/docs/hub/spaces-config-reference).
19
 
20
  You can find a "production" implementation of the Tutor running live at [DL4DS Tutor](https://dl4ds-dl4ds-tutor.hf.space/) from the
 
37
  git clone https://github.com/DL4DS/dl4ds_tutor
38
  ```
39
 
40
+ 2. Create your app in the apps folder. (An example is the `apps/ai_tutor` app)
41
+ ```
42
+ cd apps
43
+ mkdir your_app
44
+ ```
45
+
46
+ 2. **Put your data under the `apps/your_app/storage/data` directory**
47
  - Add URLs in the `urls.txt` file.
48
+ - Add other PDF files in the `apps/your_app/storage/data` directory.
49
 
50
  3. **To test Data Loading (Optional)**
51
  ```bash
52
+ cd apps/your_app
53
+ python -m modules.dataloader.data_loader --links "your_pdf_link" --config_file config/config.yml --project_config_file config/project_config.yml
54
  ```
55
 
56
  4. **Create the Vector Database**
57
  ```bash
58
+ cd apps/your_app
59
+ python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml
60
  ```
 
61
 
62
  6. **Run the FastAPI App**
63
  ```bash
64
+ cd apps/your_app
65
  uvicorn app:app --port 7860
66
  ```
67
 
 
76
 
77
  ```bash
78
  docker build --tag dev -f Dockerfile.dev .
79
+ docker run -it --rm -p 7860:7860 dev
80
  ```
81
 
82
  ## Contributing
{code β†’ apps/ai_tutor}/.chainlit/config.toml RENAMED
@@ -69,7 +69,7 @@ github = "https://github.com/DL4DS/dl4ds_tutor"
69
 
70
  # Specify a CSS file that can be used to customize the user interface.
71
  # The CSS file can be served from the public directory or via an external link.
72
- custom_css = "/public/test.css"
73
 
74
  # Specify a Javascript file that can be used to customize the user interface.
75
  # The Javascript file can be served from the public directory.
 
69
 
70
  # Specify a CSS file that can be used to customize the user interface.
71
  # The CSS file can be served from the public directory or via an external link.
72
+ custom_css = "/public/files/test.css"
73
 
74
  # Specify a Javascript file that can be used to customize the user interface.
75
  # The Javascript file can be served from the public directory.
apps/ai_tutor/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # WIP
2
+
3
+
4
+ ## Run the encrypt_students script
5
+
6
+ - If you don't want the emails to be public, run this script to encrypt the emails of the students.
7
+ - This will create a new file in the public/files/ directory.
8
+ - Place your file with the students' emails in the private/ directory (do not commit this file to the repository).
9
+
10
+ ```bash
11
+ python encrypt_students.py --students-file private/students.json --encrypted-students-file public/files/students_encrypted.json
12
+ ```
{code β†’ apps/ai_tutor}/app.py RENAMED
@@ -8,24 +8,33 @@ from chainlit.utils import mount_chainlit
8
  import secrets
9
  import json
10
  import base64
11
- from modules.config.constants import (
12
  OAUTH_GOOGLE_CLIENT_ID,
13
  OAUTH_GOOGLE_CLIENT_SECRET,
14
  CHAINLIT_URL,
15
- GITHUB_REPO,
16
- DOCS_WEBSITE,
17
- ALL_TIME_TOKENS_ALLOCATED,
18
- TOKENS_LEFT,
19
  )
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.staticfiles import StaticFiles
22
- from modules.chat_processor.helpers import (
23
- get_user_details,
24
  get_time,
25
  reset_tokens_for_user,
26
  check_user_cooldown,
27
- update_user_info,
28
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  GOOGLE_CLIENT_ID = OAUTH_GOOGLE_CLIENT_ID
31
  GOOGLE_CLIENT_SECRET = OAUTH_GOOGLE_CLIENT_SECRET
@@ -46,13 +55,8 @@ session_store = {}
46
  CHAINLIT_PATH = "/chainlit_tutor"
47
 
48
  # only admin is given any additional permissions for now -- no limits on tokens
49
- USER_ROLES = {
50
- "[email protected]": ["instructor", "bu"],
51
- "[email protected]": ["admin", "instructor", "bu"],
52
- "[email protected]": ["instructor", "bu"],
53
- "[email protected]": ["guest"],
54
- # Add more users and roles as needed
55
- }
56
 
57
  # Create a Google OAuth flow
58
  flow = Flow.from_client_config(
@@ -80,7 +84,20 @@ flow = Flow.from_client_config(
80
 
81
 
82
  def get_user_role(username: str):
83
- return USER_ROLES.get(username, ["guest"]) # Default to "guest" role
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  async def get_user_info_from_cookie(request: Request):
@@ -146,6 +163,11 @@ async def login_page(request: Request):
146
  # return response
147
 
148
 
 
 
 
 
 
149
  @app.get("/login/google")
150
  async def login_google(request: Request):
151
  # Clear any existing session cookies to avoid conflicts with guest sessions
@@ -176,6 +198,9 @@ async def auth_google(request: Request):
176
  profile_image = user_info.get("picture", "")
177
  role = get_user_role(email)
178
 
 
 
 
179
  session_token = secrets.token_hex(16)
180
  session_store[session_token] = {
181
  "email": email,
@@ -210,7 +235,7 @@ async def cooldown(request: Request):
210
  user_details = await get_user_details(user_info["email"])
211
  current_datetime = get_time()
212
  cooldown, cooldown_end_time = await check_user_cooldown(
213
- user_details, current_datetime
214
  )
215
  print(f"User in cooldown: {cooldown}")
216
  print(f"Cooldown end time: {cooldown_end_time}")
@@ -228,7 +253,11 @@ async def cooldown(request: Request):
228
  else:
229
  user_details.metadata["in_cooldown"] = False
230
  await update_user_info(user_details)
231
- await reset_tokens_for_user(user_details)
 
 
 
 
232
  return RedirectResponse("/post-signin")
233
 
234
 
@@ -256,13 +285,19 @@ async def post_signin(request: Request):
256
  if "last_message_time" in user_details.metadata and "admin" not in get_user_role(
257
  user_info["email"]
258
  ):
259
- cooldown, _ = await check_user_cooldown(user_details, current_datetime)
 
 
260
  if cooldown:
261
  user_details.metadata["in_cooldown"] = True
262
  return RedirectResponse("/cooldown")
263
  else:
264
  user_details.metadata["in_cooldown"] = False
265
- await reset_tokens_for_user(user_details)
 
 
 
 
266
 
267
  if user_info:
268
  username = user_info["email"]
@@ -335,7 +370,11 @@ async def get_tokens_left(request: Request):
335
  try:
336
  user_info = await get_user_info_from_cookie(request)
337
  user_details = await get_user_details(user_info["email"])
338
- await reset_tokens_for_user(user_details)
 
 
 
 
339
  tokens_left = user_details.metadata["tokens_left"]
340
  return {"tokens_left": tokens_left}
341
  except Exception as e:
@@ -343,7 +382,7 @@ async def get_tokens_left(request: Request):
343
  return {"tokens_left": 0}
344
 
345
 
346
- mount_chainlit(app=app, target="main.py", path=CHAINLIT_PATH)
347
 
348
  if __name__ == "__main__":
349
  import uvicorn
 
8
  import secrets
9
  import json
10
  import base64
11
+ from config.constants import (
12
  OAUTH_GOOGLE_CLIENT_ID,
13
  OAUTH_GOOGLE_CLIENT_SECRET,
14
  CHAINLIT_URL,
15
+ EMAIL_ENCRYPTION_KEY,
 
 
 
16
  )
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from fastapi.staticfiles import StaticFiles
19
+ from helpers import (
 
20
  get_time,
21
  reset_tokens_for_user,
22
  check_user_cooldown,
 
23
  )
24
+ from modules.chat_processor.helpers import get_user_details, update_user_info
25
+ from config.config_manager import config_manager
26
+ import hashlib
27
+
28
+ # set config
29
+ config = config_manager.get_config().dict()
30
+
31
+ # set constants
32
+ GITHUB_REPO = config["misc"]["github_repo"]
33
+ DOCS_WEBSITE = config["misc"]["docs_website"]
34
+ ALL_TIME_TOKENS_ALLOCATED = config["token_config"]["all_time_tokens_allocated"]
35
+ TOKENS_LEFT = config["token_config"]["tokens_left"]
36
+ COOLDOWN_TIME = config["token_config"]["cooldown_time"]
37
+ REGEN_TIME = config["token_config"]["regen_time"]
38
 
39
  GOOGLE_CLIENT_ID = OAUTH_GOOGLE_CLIENT_ID
40
  GOOGLE_CLIENT_SECRET = OAUTH_GOOGLE_CLIENT_SECRET
 
55
  CHAINLIT_PATH = "/chainlit_tutor"
56
 
57
  # only admin is given any additional permissions for now -- no limits on tokens
58
+ with open("public/files/students_encrypted.json", "r") as file:
59
+ USER_ROLES = json.load(file)
 
 
 
 
 
60
 
61
  # Create a Google OAuth flow
62
  flow = Flow.from_client_config(
 
84
 
85
 
86
  def get_user_role(username: str):
87
+
88
+ # Function to deterministically hash emails
89
+ def deterministic_hash(email, salt):
90
+ return hashlib.pbkdf2_hmac("sha256", email.encode(), salt, 100000).hex()
91
+
92
+ # encrypt email (#FIXME: this is not the best way to do this, not really encryption, more like a hash)
93
+ encryption_salt = EMAIL_ENCRYPTION_KEY.encode()
94
+ encrypted_email = deterministic_hash(username, encryption_salt)
95
+ role = USER_ROLES.get(encrypted_email, ["guest"])
96
+
97
+ if "guest" in role:
98
+ return "unauthorized"
99
+
100
+ return role
101
 
102
 
103
  async def get_user_info_from_cookie(request: Request):
 
163
  # return response
164
 
165
 
166
+ @app.get("/unauthorized", response_class=HTMLResponse)
167
+ async def unauthorized(request: Request):
168
+ return templates.TemplateResponse("unauthorized.html", {"request": request})
169
+
170
+
171
  @app.get("/login/google")
172
  async def login_google(request: Request):
173
  # Clear any existing session cookies to avoid conflicts with guest sessions
 
198
  profile_image = user_info.get("picture", "")
199
  role = get_user_role(email)
200
 
201
+ if role == "unauthorized":
202
+ return RedirectResponse("/unauthorized")
203
+
204
  session_token = secrets.token_hex(16)
205
  session_store[session_token] = {
206
  "email": email,
 
235
  user_details = await get_user_details(user_info["email"])
236
  current_datetime = get_time()
237
  cooldown, cooldown_end_time = await check_user_cooldown(
238
+ user_details, current_datetime, COOLDOWN_TIME, TOKENS_LEFT, REGEN_TIME
239
  )
240
  print(f"User in cooldown: {cooldown}")
241
  print(f"Cooldown end time: {cooldown_end_time}")
 
253
  else:
254
  user_details.metadata["in_cooldown"] = False
255
  await update_user_info(user_details)
256
+ await reset_tokens_for_user(
257
+ user_details,
258
+ config["token_config"]["tokens_left"],
259
+ config["token_config"]["regen_time"],
260
+ )
261
  return RedirectResponse("/post-signin")
262
 
263
 
 
285
  if "last_message_time" in user_details.metadata and "admin" not in get_user_role(
286
  user_info["email"]
287
  ):
288
+ cooldown, _ = await check_user_cooldown(
289
+ user_details, current_datetime, COOLDOWN_TIME, TOKENS_LEFT, REGEN_TIME
290
+ )
291
  if cooldown:
292
  user_details.metadata["in_cooldown"] = True
293
  return RedirectResponse("/cooldown")
294
  else:
295
  user_details.metadata["in_cooldown"] = False
296
+ await reset_tokens_for_user(
297
+ user_details,
298
+ config["token_config"]["tokens_left"],
299
+ config["token_config"]["regen_time"],
300
+ )
301
 
302
  if user_info:
303
  username = user_info["email"]
 
370
  try:
371
  user_info = await get_user_info_from_cookie(request)
372
  user_details = await get_user_details(user_info["email"])
373
+ await reset_tokens_for_user(
374
+ user_details,
375
+ config["token_config"]["tokens_left"],
376
+ config["token_config"]["regen_time"],
377
+ )
378
  tokens_left = user_details.metadata["tokens_left"]
379
  return {"tokens_left": tokens_left}
380
  except Exception as e:
 
382
  return {"tokens_left": 0}
383
 
384
 
385
+ mount_chainlit(app=app, target="chainlit_app.py", path=CHAINLIT_PATH)
386
 
387
  if __name__ == "__main__":
388
  import uvicorn
code/main.py β†’ apps/ai_tutor/chainlit_app.py RENAMED
@@ -1,12 +1,11 @@
1
  import chainlit.data as cl_data
2
  import asyncio
3
- from modules.config.constants import (
4
  LITERAL_API_KEY_LOGGING,
5
  LITERAL_API_URL,
6
  )
7
  from modules.chat_processor.literal_ai import CustomLiteralDataLayer
8
  import json
9
- import yaml
10
  from typing import Any, Dict, no_type_check
11
  import chainlit as cl
12
  from modules.chat.llm_tutor import LLMTutor
@@ -14,22 +13,24 @@ from modules.chat.helpers import (
14
  get_sources,
15
  get_history_chat_resume,
16
  get_history_setup_llm,
17
- get_last_config,
18
  )
19
  from modules.chat_processor.helpers import (
20
  update_user_info,
21
- get_time,
 
 
22
  check_user_cooldown,
23
  reset_tokens_for_user,
24
- get_user_details,
25
  )
 
26
  import copy
27
  from typing import Optional
28
  from chainlit.types import ThreadDict
29
- import time
30
  import base64
31
  from langchain_community.callbacks import get_openai_callback
32
  from datetime import datetime, timezone
 
33
 
34
  USER_TIMEOUT = 60_000
35
  SYSTEM = "System"
@@ -38,8 +39,8 @@ AGENT = "Agent"
38
  YOU = "User"
39
  ERROR = "Error"
40
 
41
- with open("modules/config/config.yml", "r") as f:
42
- config = yaml.safe_load(f)
43
 
44
 
45
  async def setup_data_layer():
@@ -81,13 +82,6 @@ class Chatbot:
81
  """
82
  self.config = config
83
 
84
- async def _load_config(self):
85
- """
86
- Load the configuration from a YAML file.
87
- """
88
- with open("modules/config/config.yml", "r") as f:
89
- return yaml.safe_load(f)
90
-
91
  @no_type_check
92
  async def setup_llm(self):
93
  """
@@ -95,7 +89,6 @@ class Chatbot:
95
 
96
  #TODO: Clean this up.
97
  """
98
- start_time = time.time()
99
 
100
  llm_settings = cl.user_session.get("llm_settings", {})
101
  (
@@ -143,8 +136,6 @@ class Chatbot:
143
  cl.user_session.set("chain", self.chain)
144
  cl.user_session.set("llm_tutor", self.llm_tutor)
145
 
146
- print("Time taken to setup LLM: ", time.time() - start_time)
147
-
148
  @no_type_check
149
  async def update_llm(self, new_settings: Dict[str, Any]):
150
  """
@@ -227,32 +218,9 @@ class Chatbot:
227
  """
228
  Inform the user about the updated LLM settings and display them as a message.
229
  """
230
- llm_settings: Dict[str, Any] = cl.user_session.get("llm_settings", {})
231
- llm_tutor = cl.user_session.get("llm_tutor")
232
- settings_dict = {
233
- "model": llm_settings.get("chat_model"),
234
- "retriever": llm_settings.get("retriever_method"),
235
- "memory_window": llm_settings.get("memory_window"),
236
- "num_docs_in_db": (
237
- len(llm_tutor.vector_db)
238
- if llm_tutor and hasattr(llm_tutor, "vector_db")
239
- else 0
240
- ),
241
- "view_sources": llm_settings.get("view_sources"),
242
- "follow_up_questions": llm_settings.get("follow_up_questions"),
243
- }
244
- print("Settings Dict: ", settings_dict)
245
  await cl.Message(
246
  author=SYSTEM,
247
  content="LLM settings have been updated. You can continue with your Query!",
248
- # elements=[
249
- # cl.Text(
250
- # name="settings",
251
- # display="side",
252
- # content=json.dumps(settings_dict, indent=4),
253
- # language="json",
254
- # ),
255
- # ],
256
  ).send()
257
 
258
  async def set_starters(self):
@@ -271,24 +239,24 @@ class Chatbot:
271
  print(e)
272
  return [
273
  cl.Starter(
274
- label="recording on CNNs?",
275
  message="Where can I find the recording for the lecture on Transformers?",
276
- icon="/public/adv-screen-recorder-svgrepo-com.svg",
277
  ),
278
  cl.Starter(
279
- label="where's the slides?",
280
  message="When are the lectures? I can't find the schedule.",
281
- icon="/public/alarmy-svgrepo-com.svg",
282
  ),
283
  cl.Starter(
284
  label="Due Date?",
285
  message="When is the final project due?",
286
- icon="/public/calendar-samsung-17-svgrepo-com.svg",
287
  ),
288
  cl.Starter(
289
  label="Explain backprop.",
290
  message="I didn't understand the math behind backprop, could you explain it?",
291
- icon="/public/acastusphoton-svgrepo-com.svg",
292
  ),
293
  ]
294
 
@@ -305,18 +273,12 @@ class Chatbot:
305
  rename_dict = {"Chatbot": LLM}
306
  return rename_dict.get(orig_author, orig_author)
307
 
308
- async def start(self, config=None):
309
  """
310
  Start the chatbot, initialize settings widgets,
311
  and display and load previous conversation if chat logging is enabled.
312
  """
313
 
314
- start_time = time.time()
315
-
316
- self.config = (
317
- await self._load_config() if config is None else config
318
- ) # Reload the configuration on chat resume
319
-
320
  await self.make_llm_settings_widgets(self.config) # Reload the settings widgets
321
 
322
  user = cl.user_session.get("user")
@@ -344,8 +306,6 @@ class Chatbot:
344
  cl.user_session.set("llm_tutor", self.llm_tutor)
345
  cl.user_session.set("chain", self.chain)
346
 
347
- print("Time taken to start LLM: ", time.time() - start_time)
348
-
349
  async def stream_response(self, response):
350
  """
351
  Stream the response from the LLM.
@@ -376,8 +336,6 @@ class Chatbot:
376
  message: The incoming chat message.
377
  """
378
 
379
- start_time = time.time()
380
-
381
  chain = cl.user_session.get("chain")
382
  token_count = 0 # initialize token count
383
  if not chain:
@@ -386,19 +344,25 @@ class Chatbot:
386
 
387
  # update user info with last message time
388
  user = cl.user_session.get("user")
389
- await reset_tokens_for_user(user)
 
 
 
 
390
  updated_user = await get_user_details(user.identifier)
391
  user.metadata = updated_user.metadata
392
  cl.user_session.set("user", user)
393
 
394
- print("\n\n User Tokens Left: ", user.metadata["tokens_left"])
395
-
396
  # see if user has token credits left
397
  # if not, return message saying they have run out of tokens
398
  if user.metadata["tokens_left"] <= 0 and "admin" not in user.metadata["role"]:
399
  current_datetime = get_time()
400
  cooldown, cooldown_end_time = await check_user_cooldown(
401
- user, current_datetime
 
 
 
 
402
  )
403
  if cooldown:
404
  # get time left in cooldown
@@ -479,12 +443,9 @@ class Chatbot:
479
  )
480
  answer_with_sources = answer_with_sources.replace("$$", "$")
481
 
482
- print("Time taken to process the message: ", time.time() - start_time)
483
-
484
  actions = []
485
 
486
  if self.config["llm_params"]["generate_follow_up"]:
487
- start_time = time.time()
488
  cb_follow_up = cl.AsyncLangchainCallbackHandler()
489
  config = {
490
  "callbacks": (
@@ -514,8 +475,6 @@ class Chatbot:
514
  )
515
  )
516
 
517
- print("Time taken to generate questions: ", time.time() - start_time)
518
-
519
  # # update user info with token count
520
  tokens_left = await update_user_from_chainlit(user, token_count)
521
 
@@ -530,25 +489,23 @@ class Chatbot:
530
  elements=source_elements,
531
  author=LLM,
532
  actions=actions,
533
- metadata=self.config,
534
  ).send()
535
 
536
  async def on_chat_resume(self, thread: ThreadDict):
537
- thread_config = None
538
  steps = thread["steps"]
539
  k = self.config["llm_params"][
540
  "memory_window"
541
  ] # on resume, alwyas use the default memory window
542
  conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
543
- thread_config = get_last_config(
544
- steps
545
- ) # TODO: Returns None for now - which causes config to be reloaded with default values
546
  cl.user_session.set("memory", conversation_list)
547
- await self.start(config=thread_config)
548
 
549
  @cl.header_auth_callback
550
  def header_auth_callback(headers: dict) -> Optional[cl.User]:
551
- print("\n\n\nI am here\n\n\n")
552
  # try: # TODO: Add try-except block after testing
553
  # TODO: Implement to get the user information from the headers (not the cookie)
554
  cookie = headers.get("cookie") # gets back a str
@@ -564,10 +521,6 @@ class Chatbot:
564
  ).decode()
565
  decoded_user_info = json.loads(decoded_user_info)
566
 
567
- print(
568
- f"\n\n USER ROLE: {decoded_user_info['literalai_info']['metadata']['role']} \n\n"
569
- )
570
-
571
  return cl.User(
572
  id=decoded_user_info["literalai_info"]["id"],
573
  identifier=decoded_user_info["literalai_info"]["identifier"],
 
1
  import chainlit.data as cl_data
2
  import asyncio
3
+ from config.constants import (
4
  LITERAL_API_KEY_LOGGING,
5
  LITERAL_API_URL,
6
  )
7
  from modules.chat_processor.literal_ai import CustomLiteralDataLayer
8
  import json
 
9
  from typing import Any, Dict, no_type_check
10
  import chainlit as cl
11
  from modules.chat.llm_tutor import LLMTutor
 
13
  get_sources,
14
  get_history_chat_resume,
15
  get_history_setup_llm,
16
+ # get_last_config,
17
  )
18
  from modules.chat_processor.helpers import (
19
  update_user_info,
20
+ get_user_details,
21
+ )
22
+ from helpers import (
23
  check_user_cooldown,
24
  reset_tokens_for_user,
 
25
  )
26
+ from helpers import get_time
27
  import copy
28
  from typing import Optional
29
  from chainlit.types import ThreadDict
 
30
  import base64
31
  from langchain_community.callbacks import get_openai_callback
32
  from datetime import datetime, timezone
33
+ from config.config_manager import config_manager
34
 
35
  USER_TIMEOUT = 60_000
36
  SYSTEM = "System"
 
39
  YOU = "User"
40
  ERROR = "Error"
41
 
42
+ # set config
43
+ config = config_manager.get_config().dict()
44
 
45
 
46
  async def setup_data_layer():
 
82
  """
83
  self.config = config
84
 
 
 
 
 
 
 
 
85
  @no_type_check
86
  async def setup_llm(self):
87
  """
 
89
 
90
  #TODO: Clean this up.
91
  """
 
92
 
93
  llm_settings = cl.user_session.get("llm_settings", {})
94
  (
 
136
  cl.user_session.set("chain", self.chain)
137
  cl.user_session.set("llm_tutor", self.llm_tutor)
138
 
 
 
139
  @no_type_check
140
  async def update_llm(self, new_settings: Dict[str, Any]):
141
  """
 
218
  """
219
  Inform the user about the updated LLM settings and display them as a message.
220
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  await cl.Message(
222
  author=SYSTEM,
223
  content="LLM settings have been updated. You can continue with your Query!",
 
 
 
 
 
 
 
 
224
  ).send()
225
 
226
  async def set_starters(self):
 
239
  print(e)
240
  return [
241
  cl.Starter(
242
+ label="recording on Transformers?",
243
  message="Where can I find the recording for the lecture on Transformers?",
244
+ icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
245
  ),
246
  cl.Starter(
247
+ label="where's the schedule?",
248
  message="When are the lectures? I can't find the schedule.",
249
+ icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
250
  ),
251
  cl.Starter(
252
  label="Due Date?",
253
  message="When is the final project due?",
254
+ icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
255
  ),
256
  cl.Starter(
257
  label="Explain backprop.",
258
  message="I didn't understand the math behind backprop, could you explain it?",
259
+ icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
260
  ),
261
  ]
262
 
 
273
  rename_dict = {"Chatbot": LLM}
274
  return rename_dict.get(orig_author, orig_author)
275
 
276
+ async def start(self):
277
  """
278
  Start the chatbot, initialize settings widgets,
279
  and display and load previous conversation if chat logging is enabled.
280
  """
281
 
 
 
 
 
 
 
282
  await self.make_llm_settings_widgets(self.config) # Reload the settings widgets
283
 
284
  user = cl.user_session.get("user")
 
306
  cl.user_session.set("llm_tutor", self.llm_tutor)
307
  cl.user_session.set("chain", self.chain)
308
 
 
 
309
  async def stream_response(self, response):
310
  """
311
  Stream the response from the LLM.
 
336
  message: The incoming chat message.
337
  """
338
 
 
 
339
  chain = cl.user_session.get("chain")
340
  token_count = 0 # initialize token count
341
  if not chain:
 
344
 
345
  # update user info with last message time
346
  user = cl.user_session.get("user")
347
+ await reset_tokens_for_user(
348
+ user,
349
+ self.config["token_config"]["tokens_left"],
350
+ self.config["token_config"]["regen_time"],
351
+ )
352
  updated_user = await get_user_details(user.identifier)
353
  user.metadata = updated_user.metadata
354
  cl.user_session.set("user", user)
355
 
 
 
356
  # see if user has token credits left
357
  # if not, return message saying they have run out of tokens
358
  if user.metadata["tokens_left"] <= 0 and "admin" not in user.metadata["role"]:
359
  current_datetime = get_time()
360
  cooldown, cooldown_end_time = await check_user_cooldown(
361
+ user,
362
+ current_datetime,
363
+ self.config["token_config"]["cooldown_time"],
364
+ self.config["token_config"]["tokens_left"],
365
+ self.config["token_config"]["regen_time"],
366
  )
367
  if cooldown:
368
  # get time left in cooldown
 
443
  )
444
  answer_with_sources = answer_with_sources.replace("$$", "$")
445
 
 
 
446
  actions = []
447
 
448
  if self.config["llm_params"]["generate_follow_up"]:
 
449
  cb_follow_up = cl.AsyncLangchainCallbackHandler()
450
  config = {
451
  "callbacks": (
 
475
  )
476
  )
477
 
 
 
478
  # # update user info with token count
479
  tokens_left = await update_user_from_chainlit(user, token_count)
480
 
 
489
  elements=source_elements,
490
  author=LLM,
491
  actions=actions,
 
492
  ).send()
493
 
494
  async def on_chat_resume(self, thread: ThreadDict):
495
+ # thread_config = None
496
  steps = thread["steps"]
497
  k = self.config["llm_params"][
498
  "memory_window"
499
  ] # on resume, alwyas use the default memory window
500
  conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
501
+ # thread_config = get_last_config(
502
+ # steps
503
+ # ) # TODO: Returns None for now - which causes config to be reloaded with default values
504
  cl.user_session.set("memory", conversation_list)
505
+ await self.start()
506
 
507
  @cl.header_auth_callback
508
  def header_auth_callback(headers: dict) -> Optional[cl.User]:
 
509
  # try: # TODO: Add try-except block after testing
510
  # TODO: Implement to get the user information from the headers (not the cookie)
511
  cookie = headers.get("cookie") # gets back a str
 
521
  ).decode()
522
  decoded_user_info = json.loads(decoded_user_info)
523
 
 
 
 
 
524
  return cl.User(
525
  id=decoded_user_info["literalai_info"]["id"],
526
  identifier=decoded_user_info["literalai_info"]["identifier"],
{code/modules β†’ apps/ai_tutor}/config/config.yml RENAMED
@@ -1,15 +1,15 @@
1
- log_dir: '../storage/logs' # str
2
- log_chunk_dir: '../storage/logs/chunks' # str
3
  device: 'cpu' # str [cuda, cpu]
4
 
5
  vectorstore:
6
  load_from_HF: True # bool
7
  reparse_files: True # bool
8
- data_path: '../storage/data' # str
9
- url_file_path: '../storage/data/urls.txt' # str
10
  expand_urls: True # bool
11
  db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
- db_path : '../vectorstores' # str
13
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
  search_top_k : 3 # int
15
  score_threshold : 0.2 # float
 
1
+ log_dir: 'storage/logs' # str
2
+ log_chunk_dir: 'storage/logs/chunks' # str
3
  device: 'cpu' # str [cuda, cpu]
4
 
5
  vectorstore:
6
  load_from_HF: True # bool
7
  reparse_files: True # bool
8
+ data_path: 'storage/data' # str
9
+ url_file_path: 'storage/data/urls.txt' # str
10
  expand_urls: True # bool
11
  db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
+ db_path : 'vectorstores' # str
13
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
  search_top_k : 3 # int
15
  score_threshold : 0.2 # float
apps/ai_tutor/config/config_manager.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, conint, confloat, HttpUrl
2
+ from typing import Optional, List
3
+ import yaml
4
+
5
+
6
+ class FaissParams(BaseModel):
7
+ index_path: str = "vectorstores/faiss.index"
8
+ index_type: str = "Flat" # Options: [Flat, HNSW, IVF]
9
+ index_dimension: conint(gt=0) = 384
10
+ index_nlist: conint(gt=0) = 100
11
+ index_nprobe: conint(gt=0) = 10
12
+
13
+
14
+ class ColbertParams(BaseModel):
15
+ index_name: str = "new_idx"
16
+
17
+
18
+ class VectorStoreConfig(BaseModel):
19
+ load_from_HF: bool = True
20
+ reparse_files: bool = True
21
+ data_path: str = "storage/data"
22
+ url_file_path: str = "storage/data/urls.txt"
23
+ expand_urls: bool = True
24
+ db_option: str = "RAGatouille" # Options: [FAISS, Chroma, RAGatouille, RAPTOR]
25
+ db_path: str = "vectorstores"
26
+ model: str = (
27
+ # Options: [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002]
28
+ "sentence-transformers/all-MiniLM-L6-v2"
29
+ )
30
+ search_top_k: conint(gt=0) = 3
31
+ score_threshold: confloat(ge=0.0, le=1.0) = 0.2
32
+
33
+ faiss_params: Optional[FaissParams] = None
34
+ colbert_params: Optional[ColbertParams] = None
35
+
36
+
37
+ class OpenAIParams(BaseModel):
38
+ temperature: confloat(ge=0.0, le=1.0) = 0.7
39
+
40
+
41
+ class LocalLLMParams(BaseModel):
42
+ temperature: confloat(ge=0.0, le=1.0) = 0.7
43
+ repo_id: str = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
44
+ filename: str = (
45
+ "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
46
+ )
47
+ model_path: str = (
48
+ "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
49
+ )
50
+
51
+
52
+ class LLMParams(BaseModel):
53
+ llm_arch: str = "langchain" # Options: [langchain]
54
+ use_history: bool = True
55
+ generate_follow_up: bool = False
56
+ memory_window: conint(ge=1) = 3
57
+ llm_style: str = "Normal" # Options: [Normal, ELI5]
58
+ llm_loader: str = (
59
+ "gpt-4o-mini" # Options: [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
60
+ )
61
+ openai_params: Optional[OpenAIParams] = None
62
+ local_llm_params: Optional[LocalLLMParams] = None
63
+ stream: bool = False
64
+ pdf_reader: str = "gpt" # Options: [llama, pymupdf, gpt]
65
+
66
+
67
+ class ChatLoggingConfig(BaseModel):
68
+ log_chat: bool = True
69
+ platform: str = "literalai"
70
+ callbacks: bool = True
71
+
72
+
73
+ class SplitterOptions(BaseModel):
74
+ use_splitter: bool = True
75
+ split_by_token: bool = True
76
+ remove_leftover_delimiters: bool = True
77
+ remove_chunks: bool = False
78
+ chunking_mode: str = "semantic" # Options: [fixed, semantic]
79
+ chunk_size: conint(gt=0) = 300
80
+ chunk_overlap: conint(ge=0) = 30
81
+ chunk_separators: List[str] = ["\n\n", "\n", " ", ""]
82
+ front_chunks_to_remove: Optional[conint(ge=0)] = None
83
+ last_chunks_to_remove: Optional[conint(ge=0)] = None
84
+ delimiters_to_remove: List[str] = ["\t", "\n", " ", " "]
85
+
86
+
87
+ class RetrieverConfig(BaseModel):
88
+ retriever_hf_paths: dict[str, str] = {"RAGatouille": "XThomasBU/Colbert_Index"}
89
+
90
+
91
+ class MetadataConfig(BaseModel):
92
+ metadata_links: List[HttpUrl] = [
93
+ "https://dl4ds.github.io/sp2024/lectures/",
94
+ "https://dl4ds.github.io/sp2024/schedule/",
95
+ ]
96
+ slide_base_link: HttpUrl = "https://dl4ds.github.io"
97
+
98
+
99
+ class TokenConfig(BaseModel):
100
+ cooldown_time: conint(gt=0) = 60
101
+ regen_time: conint(gt=0) = 180
102
+ tokens_left: conint(gt=0) = 2000
103
+ all_time_tokens_allocated: conint(gt=0) = 1000000
104
+
105
+
106
+ class MiscConfig(BaseModel):
107
+ github_repo: HttpUrl = "https://github.com/DL4DS/dl4ds_tutor"
108
+ docs_website: HttpUrl = "https://dl4ds.github.io/dl4ds_tutor/"
109
+
110
+
111
+ class APIConfig(BaseModel):
112
+ timeout: conint(gt=0) = 60
113
+
114
+
115
+ class Config(BaseModel):
116
+ log_dir: str = "storage/logs"
117
+ log_chunk_dir: str = "storage/logs/chunks"
118
+ device: str = "cpu" # Options: ['cuda', 'cpu']
119
+
120
+ vectorstore: VectorStoreConfig
121
+ llm_params: LLMParams
122
+ chat_logging: ChatLoggingConfig
123
+ splitter_options: SplitterOptions
124
+ retriever: RetrieverConfig
125
+ metadata: MetadataConfig
126
+ token_config: TokenConfig
127
+ misc: MiscConfig
128
+ api_config: APIConfig
129
+
130
+
131
+ class ConfigManager:
132
+ def __init__(self, config_path: str, project_config_path: str):
133
+ self.config_path = config_path
134
+ self.project_config_path = project_config_path
135
+ self.config = self.load_config()
136
+ self.validate_config()
137
+
138
+ def load_config(self) -> Config:
139
+ with open(self.config_path, "r") as f:
140
+ config_data = yaml.safe_load(f)
141
+
142
+ with open(self.project_config_path, "r") as f:
143
+ project_config_data = yaml.safe_load(f)
144
+
145
+ # Merge the two configurations
146
+ merged_config = {**config_data, **project_config_data}
147
+
148
+ return Config(**merged_config)
149
+
150
+ def get_config(self) -> Config:
151
+ return ConfigWrapper(self.config)
152
+
153
+ def validate_config(self):
154
+ # If any required fields are missing, raise an error
155
+ # required_fields = [
156
+ # "vectorstore", "llm_params", "chat_logging", "splitter_options",
157
+ # "retriever", "metadata", "token_config", "misc", "api_config"
158
+ # ]
159
+ # for field in required_fields:
160
+ # if not hasattr(self.config, field):
161
+ # raise ValueError(f"Missing required configuration field: {field}")
162
+
163
+ # # Validate types of specific fields
164
+ # if not isinstance(self.config.vectorstore, VectorStoreConfig):
165
+ # raise TypeError("vectorstore must be an instance of VectorStoreConfig")
166
+ # if not isinstance(self.config.llm_params, LLMParams):
167
+ # raise TypeError("llm_params must be an instance of LLMParams")
168
+ pass
169
+
170
+
171
+ class ConfigWrapper:
172
+ def __init__(self, config: Config):
173
+ self._config = config
174
+
175
+ def __getitem__(self, key):
176
+ return getattr(self._config, key)
177
+
178
+ def __getattr__(self, name):
179
+ return getattr(self._config, name)
180
+
181
+ def dict(self):
182
+ return self._config.dict()
183
+
184
+
185
+ # Usage
186
+ config_manager = ConfigManager(
187
+ config_path="config/config.yml", project_config_path="config/project_config.yml"
188
+ )
189
+ # config = config_manager.get_config().dict()
{code/modules β†’ apps/ai_tutor}/config/constants.py RENAMED
@@ -3,15 +3,6 @@ import os
3
 
4
  load_dotenv()
5
 
6
- TIMEOUT = 60
7
- COOLDOWN_TIME = 60
8
- REGEN_TIME = 180
9
- TOKENS_LEFT = 2000
10
- ALL_TIME_TOKENS_ALLOCATED = 1000000
11
-
12
- GITHUB_REPO = "https://github.com/DL4DS/dl4ds_tutor"
13
- DOCS_WEBSITE = "https://dl4ds.github.io/dl4ds_tutor/"
14
-
15
  # API Keys - Loaded from the .env file
16
 
17
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -20,6 +11,7 @@ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
20
  LITERAL_API_KEY_LOGGING = os.getenv("LITERAL_API_KEY_LOGGING")
21
  LITERAL_API_URL = os.getenv("LITERAL_API_URL")
22
  CHAINLIT_URL = os.getenv("CHAINLIT_URL")
 
23
 
24
  OAUTH_GOOGLE_CLIENT_ID = os.getenv("OAUTH_GOOGLE_CLIENT_ID")
25
  OAUTH_GOOGLE_CLIENT_SECRET = os.getenv("OAUTH_GOOGLE_CLIENT_SECRET")
 
3
 
4
  load_dotenv()
5
 
 
 
 
 
 
 
 
 
 
6
  # API Keys - Loaded from the .env file
7
 
8
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
11
  LITERAL_API_KEY_LOGGING = os.getenv("LITERAL_API_KEY_LOGGING")
12
  LITERAL_API_URL = os.getenv("LITERAL_API_URL")
13
  CHAINLIT_URL = os.getenv("CHAINLIT_URL")
14
+ EMAIL_ENCRYPTION_KEY = os.getenv("EMAIL_ENCRYPTION_KEY")
15
 
16
  OAUTH_GOOGLE_CLIENT_ID = os.getenv("OAUTH_GOOGLE_CLIENT_ID")
17
  OAUTH_GOOGLE_CLIENT_SECRET = os.getenv("OAUTH_GOOGLE_CLIENT_SECRET")
apps/ai_tutor/config/project_config.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ retriever:
2
+ retriever_hf_paths:
3
+ RAGatouille: "XThomasBU/Colbert_Index"
4
+
5
+ metadata:
6
+ metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
7
+ slide_base_link: "https://dl4ds.github.io"
8
+
9
+ token_config:
10
+ cooldown_time: 60
11
+ regen_time: 180
12
+ tokens_left: 2000
13
+ all_time_tokens_allocated: 1000000
14
+
15
+ misc:
16
+ github_repo: "https://github.com/DL4DS/dl4ds_tutor"
17
+ docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
18
+
19
+ api_config:
20
+ timeout: 60
{code/modules β†’ apps/ai_tutor}/config/prompts.py RENAMED
File without changes
apps/ai_tutor/encrypt_students.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+ import json
4
+ import argparse
5
+ from dotenv import load_dotenv
6
+
7
+
8
+ # Function to deterministically hash emails
9
+ def deterministic_hash(email, salt):
10
+ return hashlib.pbkdf2_hmac("sha256", email.encode(), salt, 100000).hex()
11
+
12
+
13
+ def main(args):
14
+ # Load the .env file
15
+ load_dotenv()
16
+
17
+ # Get the encryption key (salt)
18
+ encryption_salt = os.getenv("EMAIL_ENCRYPTION_KEY").encode()
19
+
20
+ # Load emails from the specified JSON file
21
+ with open(args.students_file, "r") as file:
22
+ emails = json.load(file)
23
+
24
+ # Replace emails with deterministic hashed emails, {hashed_email: [roles]}
25
+ hashed_emails = {
26
+ deterministic_hash(email, encryption_salt): roles
27
+ for email, roles in emails.items()
28
+ }
29
+
30
+ # Save hashed emails to the specified encrypted JSON file
31
+ with open(args.encrypted_students_file, "w") as file:
32
+ json.dump(hashed_emails, file)
33
+
34
+
35
+ if __name__ == "__main__":
36
+ parser = argparse.ArgumentParser(
37
+ description="Encrypt student emails in a JSON file."
38
+ )
39
+ parser.add_argument(
40
+ "--students-file",
41
+ type=str,
42
+ default="private/students.json",
43
+ help="Path to the students JSON file",
44
+ )
45
+ parser.add_argument(
46
+ "--encrypted-students-file",
47
+ type=str,
48
+ default="public/files/students_encrypted.json",
49
+ help="Path to save the encrypted students JSON file",
50
+ )
51
+ args = parser.parse_args()
52
+
53
+ main(args)
apps/ai_tutor/helpers.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta, timezone
2
+ import tiktoken
3
+ from modules.chat_processor.helpers import update_user_info, convert_to_dict
4
+
5
+
6
+ def get_time():
7
+ return datetime.now(timezone.utc).isoformat()
8
+
9
+
10
+ async def check_user_cooldown(
11
+ user_info, current_time, COOLDOWN_TIME, TOKENS_LEFT, REGEN_TIME
12
+ ):
13
+ # # Check if no tokens left
14
+ tokens_left = user_info.metadata.get("tokens_left", 0)
15
+ if tokens_left > 0 and not user_info.metadata.get("in_cooldown", False):
16
+ return False, None
17
+
18
+ user_info = convert_to_dict(user_info)
19
+ last_message_time_str = user_info["metadata"].get("last_message_time")
20
+
21
+ # Convert from ISO format string to datetime object and ensure UTC timezone
22
+ last_message_time = datetime.fromisoformat(last_message_time_str).replace(
23
+ tzinfo=timezone.utc
24
+ )
25
+ current_time = datetime.fromisoformat(current_time).replace(tzinfo=timezone.utc)
26
+
27
+ # Calculate the elapsed time
28
+ elapsed_time = current_time - last_message_time
29
+ elapsed_time_in_seconds = elapsed_time.total_seconds()
30
+
31
+ # Calculate when the cooldown period ends
32
+ cooldown_end_time = last_message_time + timedelta(seconds=COOLDOWN_TIME)
33
+ cooldown_end_time_iso = cooldown_end_time.isoformat()
34
+
35
+ # Check if the user is still in cooldown
36
+ if elapsed_time_in_seconds < COOLDOWN_TIME:
37
+ return True, cooldown_end_time_iso # Return in ISO 8601 format
38
+
39
+ user_info["metadata"]["in_cooldown"] = False
40
+ # If not in cooldown, regenerate tokens
41
+ await reset_tokens_for_user(user_info, TOKENS_LEFT, REGEN_TIME)
42
+
43
+ return False, None
44
+
45
+
46
+ async def reset_tokens_for_user(user_info, TOKENS_LEFT, REGEN_TIME):
47
+ user_info = convert_to_dict(user_info)
48
+ last_message_time_str = user_info["metadata"].get("last_message_time")
49
+
50
+ last_message_time = datetime.fromisoformat(last_message_time_str).replace(
51
+ tzinfo=timezone.utc
52
+ )
53
+ current_time = datetime.fromisoformat(get_time()).replace(tzinfo=timezone.utc)
54
+
55
+ # Calculate the elapsed time since the last message
56
+ elapsed_time_in_seconds = (current_time - last_message_time).total_seconds()
57
+
58
+ # Current token count (can be negative)
59
+ current_tokens = user_info["metadata"].get("tokens_left_at_last_message", 0)
60
+ current_tokens = min(current_tokens, TOKENS_LEFT)
61
+
62
+ # Maximum tokens that can be regenerated
63
+ max_tokens = user_info["metadata"].get("max_tokens", TOKENS_LEFT)
64
+
65
+ # Calculate how many tokens should have been regenerated proportionally
66
+ if current_tokens < max_tokens:
67
+ # Calculate the regeneration rate per second based on REGEN_TIME for full regeneration
68
+ # If current_tokens is close to 0, then the regeneration rate is relatively high, and if current_tokens is close to max_tokens, then the regeneration rate is relatively low
69
+ regeneration_rate_per_second = (
70
+ max_tokens - max(current_tokens, 0)
71
+ ) / REGEN_TIME
72
+
73
+ # Calculate how many tokens should have been regenerated based on the elapsed time
74
+ tokens_to_regenerate = int(
75
+ elapsed_time_in_seconds * regeneration_rate_per_second
76
+ )
77
+
78
+ # Ensure the new token count does not exceed max_tokens
79
+ new_token_count = min(current_tokens + tokens_to_regenerate, max_tokens)
80
+
81
+ # Update the user's token count
82
+ user_info["metadata"]["tokens_left"] = new_token_count
83
+
84
+ await update_user_info(user_info)
85
+
86
+
87
+ def get_num_tokens(text, model):
88
+ encoding = tiktoken.encoding_for_model(model)
89
+ tokens = encoding.encode(text)
90
+ return len(tokens)
apps/ai_tutor/private/placeholder_students_file.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "[email protected]": ["student", "bu"],
3
+ "[email protected]": ["student", "bu"],
4
+ "[email protected]": ["admin", "instructor", "bu"]
5
+ }
code/public/avatars/ai_tutor.png β†’ apps/ai_tutor/public/assets/images/avatars/ai-tutor.png RENAMED
File without changes
code/public/logo_dark.png β†’ apps/ai_tutor/public/assets/images/avatars/ai_tutor.png RENAMED
File without changes
{code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/acastusphoton-svgrepo-com.svg RENAMED
File without changes
{code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/adv-screen-recorder-svgrepo-com.svg RENAMED
File without changes
{code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/alarmy-svgrepo-com.svg RENAMED
File without changes
{code/public β†’ apps/ai_tutor/public/assets/images/starter_icons}/calendar-samsung-17-svgrepo-com.svg RENAMED
File without changes
apps/ai_tutor/public/files/students_encrypted.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": ["instructor", "bu"], "51ebf87ac51618300acfef8bfa9768fdee40e2d3f39cfb4ae8a76722ee336de4": ["admin", "instructor", "bu"], "7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": ["instructor", "bu"], "a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": ["student", "bu"]}
apps/ai_tutor/public/files/test.css ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a[href*='https://github.com/Chainlit/chainlit'] {
2
+ visibility: hidden;
3
+ }
4
+
5
+ /* Hide the default avatar image */
6
+ .MuiAvatar-root img.MuiAvatar-img {
7
+ display: none;
8
+ }
9
+
10
+ /* Target the container of the image and set a custom background image */
11
+ .MuiAvatar-root.MuiAvatar-circular.css-m2icte {
12
+ background-image: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with your custom image URL */
13
+ background-size: cover; /* Ensure the image covers the entire container */
14
+ background-position: center; /* Center the image */
15
+ width: 100px; /* Ensure the dimensions match the original */
16
+ height: 100px; /* Ensure the dimensions match the original */
17
+ border-radius: 50%; /* Maintain circular shape */
18
+ }
19
+ .MuiAvatar-root.MuiAvatar-circular.css-v72an7 {
20
+ background-image: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with your custom image URL */
21
+ background-size: cover; /* Ensure the image covers the entire container */
22
+ background-position: center; /* Center the image */
23
+ width: 40px; /* Ensure the dimensions match the original */
24
+ height: 40px; /* Ensure the dimensions match the original */
25
+ border-radius: 50%; /* Maintain circular shape */
26
+ }
27
+
28
+ .MuiStack-root.css-14k6mw7 img {
29
+ content: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with the path to your custom image */
30
+ max-height: 45px; /* Ensure the height remains consistent */
31
+ max-width: 45px; /* Ensure the width remains consistent */
32
+ }
code/public/logo_light.png β†’ apps/ai_tutor/public/logo_dark.png RENAMED
File without changes
apps/ai_tutor/public/logo_light.png ADDED
{storage β†’ apps/ai_tutor/storage}/data/urls.txt RENAMED
File without changes
{code β†’ apps/ai_tutor}/templates/cooldown.html RENAMED
@@ -121,7 +121,7 @@
121
  </head>
122
  <body>
123
  <div class="container">
124
- <img src="/public/avatars/ai_tutor.png" alt="AI Tutor Avatar" class="avatar">
125
  <h1>Hello, {{ username }}</h1>
126
  <p>It seems like you need to wait a bit before starting a new session.</p>
127
  <p class="cooldown-message">Time remaining until the cooldown period ends:</p>
 
121
  </head>
122
  <body>
123
  <div class="container">
124
+ <img src="/public/assets/images/avatars/ai-tutor.png" alt="AI Tutor Avatar" class="avatar">
125
  <h1>Hello, {{ username }}</h1>
126
  <p>It seems like you need to wait a bit before starting a new session.</p>
127
  <p class="cooldown-message">Time remaining until the cooldown period ends:</p>
{code β†’ apps/ai_tutor}/templates/dashboard.html RENAMED
@@ -123,7 +123,7 @@
123
  </head>
124
  <body>
125
  <div class="container">
126
- <img src="/public/avatars/ai_tutor.png" alt="AI Tutor Avatar" class="avatar">
127
  <h1>Welcome, {{ username }}</h1>
128
  <p>Ready to start your AI tutoring session?</p>
129
  <p class="tokens-left">Tokens Left: {{ tokens_left }}</p>
 
123
  </head>
124
  <body>
125
  <div class="container">
126
+ <img src="/public/assets/images/avatars/ai-tutor.png" alt="AI Tutor Avatar" class="avatar">
127
  <h1>Welcome, {{ username }}</h1>
128
  <p>Ready to start your AI tutoring session?</p>
129
  <p class="tokens-left">Tokens Left: {{ tokens_left }}</p>
{code β†’ apps/ai_tutor}/templates/error.html RENAMED
File without changes
{code β†’ apps/ai_tutor}/templates/error_404.html RENAMED
File without changes
{code β†’ apps/ai_tutor}/templates/login.html RENAMED
@@ -107,7 +107,7 @@
107
  </head>
108
  <body>
109
  <div class="container">
110
- <img src="/public/avatars/ai_tutor.png" alt="AI Tutor Avatar" class="avatar">
111
  <h1>Terrier Tutor</h1>
112
  <p>Welcome to the DS598 AI Tutor. Please sign in to continue.</p>
113
  <form action="/login/google" method="get">
 
107
  </head>
108
  <body>
109
  <div class="container">
110
+ <img src="/public/assets/images/avatars/ai-tutor.png" alt="AI Tutor Avatar" class="avatar">
111
  <h1>Terrier Tutor</h1>
112
  <p>Welcome to the DS598 AI Tutor. Please sign in to continue.</p>
113
  <form action="/login/google" method="get">
{code β†’ apps/ai_tutor}/templates/logout.html RENAMED
File without changes
apps/ai_tutor/templates/unauthorized.html ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Access Restricted</title>
7
+ <style>
8
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap');
9
+
10
+ body, html {
11
+ margin: 0;
12
+ padding: 0;
13
+ font-family: 'Inter', sans-serif;
14
+ background-color: #f7f7f7; /* Light gray background */
15
+ background-image: url('https://www.transparenttextures.com/patterns/cubes.png'); /* Subtle geometric pattern */
16
+ background-repeat: repeat;
17
+ display: flex;
18
+ align-items: center;
19
+ justify-content: center;
20
+ height: 100vh;
21
+ color: #333;
22
+ }
23
+
24
+ .container {
25
+ background: rgba(255, 255, 255, 0.9);
26
+ border: 1px solid #ddd;
27
+ border-radius: 8px;
28
+ width: 100%;
29
+ max-width: 400px;
30
+ padding: 50px;
31
+ box-sizing: border-box;
32
+ text-align: center;
33
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
34
+ backdrop-filter: blur(10px);
35
+ -webkit-backdrop-filter: blur(10px);
36
+ }
37
+
38
+ .avatar {
39
+ width: 90px;
40
+ height: 90px;
41
+ border-radius: 50%;
42
+ margin-bottom: 25px;
43
+ border: 2px solid #ddd;
44
+ }
45
+
46
+ .container h1 {
47
+ margin-bottom: 20px;
48
+ font-size: 26px;
49
+ font-weight: 600;
50
+ color: #1a1a1a;
51
+ }
52
+
53
+ .container p {
54
+ font-size: 18px;
55
+ color: #4a4a4a;
56
+ margin-bottom: 35px;
57
+ line-height: 1.5;
58
+ }
59
+
60
+ .button {
61
+ padding: 14px 0;
62
+ margin: 12px 0;
63
+ font-size: 16px;
64
+ border-radius: 6px;
65
+ cursor: pointer;
66
+ width: 100%;
67
+ border: 1px solid #ccc;
68
+ background-color: #007BFF;
69
+ color: #fff;
70
+ transition: background-color 0.3s ease, border-color 0.3s ease;
71
+ }
72
+
73
+ .button:hover {
74
+ background-color: #0056b3;
75
+ border-color: #0056b3;
76
+ }
77
+ </style>
78
+ </head>
79
+ <body>
80
+ <div class="container">
81
+ <img src="/public/assets/images/avatars/ai-tutor.png" alt="AI Tutor Avatar" class="avatar">
82
+ <h1>Access Restricted</h1>
83
+ <p>
84
+ We're currently testing things out for the <strong>DS598</strong> course.
85
+ Access is restricted to students of the course. If you're enrolled in <strong>DS598</strong> and seeing this message,
86
+ please reach out to us, and we'll help you get access.<br><br>
87
+ <em>P.S. Don't forget to use your BU email when logging in!</em>
88
+ </p>
89
+ <form action="/" method="get">
90
+ <button type="submit" class="button">Return to Home</button>
91
+ </form>
92
+ </div>
93
+ </body>
94
+ </html>
apps/chainlit_base/.chainlit/config.toml ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+
6
+ # List of environment variables to be provided by each user to use the app.
7
+ user_env = []
8
+
9
+ # Duration (in seconds) during which the session is saved when the connection is lost
10
+ session_timeout = 3600
11
+
12
+ # Enable third parties caching (e.g LangChain cache)
13
+ cache = false
14
+
15
+ # Authorized origins
16
+ allow_origins = ["*"]
17
+
18
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
19
+ # follow_symlink = false
20
+
21
+ [features]
22
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
23
+ unsafe_allow_html = false
24
+
25
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
26
+ latex = false
27
+
28
+ # Automatically tag threads with the current chat profile (if a chat profile is used)
29
+ auto_tag_thread = true
30
+
31
+ # Authorize users to spontaneously upload files with messages
32
+ [features.spontaneous_file_upload]
33
+ enabled = true
34
+ accept = ["*/*"]
35
+ max_files = 20
36
+ max_size_mb = 500
37
+
38
+ [features.audio]
39
+ # Threshold for audio recording
40
+ min_decibels = -45
41
+ # Delay for the user to start speaking in MS
42
+ initial_silence_timeout = 3000
43
+ # Delay for the user to continue speaking in MS. If the user stops speaking for this duration, the recording will stop.
44
+ silence_timeout = 1500
45
+ # Above this duration (MS), the recording will forcefully stop.
46
+ max_duration = 15000
47
+ # Duration of the audio chunks in MS
48
+ chunk_duration = 1000
49
+ # Sample rate of the audio
50
+ sample_rate = 44100
51
+
52
+ edit_message = true
53
+
54
+ [UI]
55
+ # Name of the assistant.
56
+ name = "Assistant"
57
+
58
+ # Description of the assistant. This is used for HTML tags.
59
+ # description = ""
60
+
61
+ # Large size content are by default collapsed for a cleaner ui
62
+ default_collapse_content = true
63
+
64
+ # Chain of Thought (CoT) display mode. Can be "hidden", "tool_call" or "full".
65
+ cot = "full"
66
+
67
+ # Link to your github repo. This will add a github button in the UI's header.
68
+ # github = ""
69
+
70
+ # Specify a CSS file that can be used to customize the user interface.
71
+ # The CSS file can be served from the public directory or via an external link.
72
+ custom_css = "/public/files/test.css"
73
+
74
+ # Specify a Javascript file that can be used to customize the user interface.
75
+ # The Javascript file can be served from the public directory.
76
+ # custom_js = "/public/test.js"
77
+
78
+ # Specify a custom font url.
79
+ # custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
80
+
81
+ # Specify a custom meta image url.
82
+ # custom_meta_image_url = "https://chainlit-cloud.s3.eu-west-3.amazonaws.com/logo/chainlit_banner.png"
83
+
84
+ # Specify a custom build directory for the frontend.
85
+ # This can be used to customize the frontend code.
86
+ # Be careful: If this is a relative path, it should not start with a slash.
87
+ # custom_build = "./public/build"
88
+
89
+ [UI.theme]
90
+ default = "dark"
91
+ #layout = "wide"
92
+ #font_family = "Inter, sans-serif"
93
+ # Override default MUI light theme. (Check theme.ts)
94
+ [UI.theme.light]
95
+ #background = "#FAFAFA"
96
+ #paper = "#FFFFFF"
97
+
98
+ [UI.theme.light.primary]
99
+ #main = "#F80061"
100
+ #dark = "#980039"
101
+ #light = "#FFE7EB"
102
+ [UI.theme.light.text]
103
+ #primary = "#212121"
104
+ #secondary = "#616161"
105
+
106
+ # Override default MUI dark theme. (Check theme.ts)
107
+ [UI.theme.dark]
108
+ #background = "#FAFAFA"
109
+ #paper = "#FFFFFF"
110
+
111
+ [UI.theme.dark.primary]
112
+ #main = "#F80061"
113
+ #dark = "#980039"
114
+ #light = "#FFE7EB"
115
+ [UI.theme.dark.text]
116
+ #primary = "#EEEEEE"
117
+ #secondary = "#BDBDBD"
118
+
119
+ [meta]
120
+ generated_by = "1.1.402"
apps/chainlit_base/chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! πŸš€πŸ€–
2
+
3
+ Hi there, Developer! πŸ‘‹ We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links πŸ”—
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) πŸ“š
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! πŸ’¬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! πŸ’»πŸ˜Š
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
{code β†’ apps/chainlit_base}/chainlit_base.py RENAMED
@@ -1,19 +1,15 @@
1
  import chainlit.data as cl_data
2
  import asyncio
3
- import yaml
4
  from typing import Any, Dict, no_type_check
5
  import chainlit as cl
6
  from modules.chat.llm_tutor import LLMTutor
7
  from modules.chat.helpers import (
8
  get_sources,
9
- get_history_chat_resume,
10
  get_history_setup_llm,
11
- get_last_config,
12
  )
13
  import copy
14
- from chainlit.types import ThreadDict
15
- import time
16
  from langchain_community.callbacks import get_openai_callback
 
17
 
18
  USER_TIMEOUT = 60_000
19
  SYSTEM = "System"
@@ -22,22 +18,7 @@ AGENT = "Agent"
22
  YOU = "User"
23
  ERROR = "Error"
24
 
25
- with open("modules/config/config.yml", "r") as f:
26
- config = yaml.safe_load(f)
27
-
28
-
29
- # async def setup_data_layer():
30
- # """
31
- # Set up the data layer for chat logging.
32
- # """
33
- # if config["chat_logging"]["log_chat"]:
34
- # data_layer = CustomLiteralDataLayer(
35
- # api_key=LITERAL_API_KEY_LOGGING, server=LITERAL_API_URL
36
- # )
37
- # else:
38
- # data_layer = None
39
-
40
- # return data_layer
41
 
42
 
43
  class Chatbot:
@@ -47,13 +28,6 @@ class Chatbot:
47
  """
48
  self.config = config
49
 
50
- async def _load_config(self):
51
- """
52
- Load the configuration from a YAML file.
53
- """
54
- with open("modules/config/config.yml", "r") as f:
55
- return yaml.safe_load(f)
56
-
57
  @no_type_check
58
  async def setup_llm(self):
59
  """
@@ -61,7 +35,6 @@ class Chatbot:
61
 
62
  #TODO: Clean this up.
63
  """
64
- start_time = time.time()
65
 
66
  llm_settings = cl.user_session.get("llm_settings", {})
67
  (
@@ -109,8 +82,6 @@ class Chatbot:
109
  cl.user_session.set("chain", self.chain)
110
  cl.user_session.set("llm_tutor", self.llm_tutor)
111
 
112
- print("Time taken to setup LLM: ", time.time() - start_time)
113
-
114
  @no_type_check
115
  async def update_llm(self, new_settings: Dict[str, Any]):
116
  """
@@ -193,70 +164,38 @@ class Chatbot:
193
  """
194
  Inform the user about the updated LLM settings and display them as a message.
195
  """
196
- llm_settings: Dict[str, Any] = cl.user_session.get("llm_settings", {})
197
- llm_tutor = cl.user_session.get("llm_tutor")
198
- settings_dict = {
199
- "model": llm_settings.get("chat_model"),
200
- "retriever": llm_settings.get("retriever_method"),
201
- "memory_window": llm_settings.get("memory_window"),
202
- "num_docs_in_db": (
203
- len(llm_tutor.vector_db)
204
- if llm_tutor and hasattr(llm_tutor, "vector_db")
205
- else 0
206
- ),
207
- "view_sources": llm_settings.get("view_sources"),
208
- "follow_up_questions": llm_settings.get("follow_up_questions"),
209
- }
210
- print("Settings Dict: ", settings_dict)
211
  await cl.Message(
212
  author=SYSTEM,
213
  content="LLM settings have been updated. You can continue with your Query!",
214
- # elements=[
215
- # cl.Text(
216
- # name="settings",
217
- # display="side",
218
- # content=json.dumps(settings_dict, indent=4),
219
- # language="json",
220
- # ),
221
- # ],
222
  ).send()
223
 
224
  async def set_starters(self):
225
  """
226
  Set starter messages for the chatbot.
227
  """
228
- # Return Starters only if the chat is new
229
-
230
- try:
231
- thread = cl_data._data_layer.get_thread(
232
- cl.context.session.thread_id
233
- ) # see if the thread has any steps
234
- if thread.steps or len(thread.steps) > 0:
235
- return None
236
- except Exception as e:
237
- print(e)
238
- return [
239
- cl.Starter(
240
- label="recording on CNNs?",
241
- message="Where can I find the recording for the lecture on Transformers?",
242
- icon="/public/adv-screen-recorder-svgrepo-com.svg",
243
- ),
244
- cl.Starter(
245
- label="where's the slides?",
246
- message="When are the lectures? I can't find the schedule.",
247
- icon="/public/alarmy-svgrepo-com.svg",
248
- ),
249
- cl.Starter(
250
- label="Due Date?",
251
- message="When is the final project due?",
252
- icon="/public/calendar-samsung-17-svgrepo-com.svg",
253
- ),
254
- cl.Starter(
255
- label="Explain backprop.",
256
- message="I didn't understand the math behind backprop, could you explain it?",
257
- icon="/public/acastusphoton-svgrepo-com.svg",
258
- ),
259
- ]
260
 
261
  def rename(self, orig_author: str):
262
  """
@@ -271,34 +210,19 @@ class Chatbot:
271
  rename_dict = {"Chatbot": LLM}
272
  return rename_dict.get(orig_author, orig_author)
273
 
274
- async def start(self, config=None):
275
  """
276
  Start the chatbot, initialize settings widgets,
277
  and display and load previous conversation if chat logging is enabled.
278
  """
279
 
280
- start_time = time.time()
281
-
282
- self.config = (
283
- await self._load_config() if config is None else config
284
- ) # Reload the configuration on chat resume
285
-
286
  await self.make_llm_settings_widgets(self.config) # Reload the settings widgets
287
 
288
- user = cl.user_session.get("user")
289
-
290
  # TODO: remove self.user with cl.user_session.get("user")
291
- try:
292
- self.user = {
293
- "user_id": user.identifier,
294
- "session_id": cl.context.session.thread_id,
295
- }
296
- except Exception as e:
297
- print(e)
298
- self.user = {
299
- "user_id": "guest",
300
- "session_id": cl.context.session.thread_id,
301
- }
302
 
303
  memory = cl.user_session.get("memory", [])
304
  self.llm_tutor = LLMTutor(self.config, user=self.user)
@@ -310,8 +234,6 @@ class Chatbot:
310
  cl.user_session.set("llm_tutor", self.llm_tutor)
311
  cl.user_session.set("chain", self.chain)
312
 
313
- print("Time taken to start LLM: ", time.time() - start_time)
314
-
315
  async def stream_response(self, response):
316
  """
317
  Stream the response from the LLM.
@@ -342,8 +264,6 @@ class Chatbot:
342
  message: The incoming chat message.
343
  """
344
 
345
- start_time = time.time()
346
-
347
  chain = cl.user_session.get("chain")
348
  token_count = 0 # initialize token count
349
  if not chain:
@@ -389,12 +309,9 @@ class Chatbot:
389
  )
390
  answer_with_sources = answer_with_sources.replace("$$", "$")
391
 
392
- print("Time taken to process the message: ", time.time() - start_time)
393
-
394
  actions = []
395
 
396
  if self.config["llm_params"]["generate_follow_up"]:
397
- start_time = time.time()
398
  cb_follow_up = cl.AsyncLangchainCallbackHandler()
399
  config = {
400
  "callbacks": (
@@ -424,30 +341,13 @@ class Chatbot:
424
  )
425
  )
426
 
427
- print("Time taken to generate questions: ", time.time() - start_time)
428
- print("Total Tokens Used: ", token_count)
429
-
430
  await cl.Message(
431
  content=answer_with_sources,
432
  elements=source_elements,
433
  author=LLM,
434
  actions=actions,
435
- metadata=self.config,
436
  ).send()
437
 
438
- async def on_chat_resume(self, thread: ThreadDict):
439
- thread_config = None
440
- steps = thread["steps"]
441
- k = self.config["llm_params"][
442
- "memory_window"
443
- ] # on resume, alwyas use the default memory window
444
- conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
445
- thread_config = get_last_config(
446
- steps
447
- ) # TODO: Returns None for now - which causes config to be reloaded with default values
448
- cl.user_session.set("memory", conversation_list)
449
- await self.start(config=thread_config)
450
-
451
  async def on_follow_up(self, action: cl.Action):
452
  user = cl.user_session.get("user")
453
  message = await cl.Message(
@@ -466,12 +366,9 @@ chatbot = Chatbot(config=config)
466
 
467
 
468
  async def start_app():
469
- # cl_data._data_layer = await setup_data_layer()
470
- # chatbot.literal_client = cl_data._data_layer.client if cl_data._data_layer else None
471
  cl.set_starters(chatbot.set_starters)
472
  cl.author_rename(chatbot.rename)
473
  cl.on_chat_start(chatbot.start)
474
- cl.on_chat_resume(chatbot.on_chat_resume)
475
  cl.on_message(chatbot.main)
476
  cl.on_settings_update(chatbot.update_llm)
477
  cl.action_callback("follow up question")(chatbot.on_follow_up)
 
1
  import chainlit.data as cl_data
2
  import asyncio
 
3
  from typing import Any, Dict, no_type_check
4
  import chainlit as cl
5
  from modules.chat.llm_tutor import LLMTutor
6
  from modules.chat.helpers import (
7
  get_sources,
 
8
  get_history_setup_llm,
 
9
  )
10
  import copy
 
 
11
  from langchain_community.callbacks import get_openai_callback
12
+ from config.config_manager import config_manager
13
 
14
  USER_TIMEOUT = 60_000
15
  SYSTEM = "System"
 
18
  YOU = "User"
19
  ERROR = "Error"
20
 
21
+ config = config_manager.get_config().dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  class Chatbot:
 
28
  """
29
  self.config = config
30
 
 
 
 
 
 
 
 
31
  @no_type_check
32
  async def setup_llm(self):
33
  """
 
35
 
36
  #TODO: Clean this up.
37
  """
 
38
 
39
  llm_settings = cl.user_session.get("llm_settings", {})
40
  (
 
82
  cl.user_session.set("chain", self.chain)
83
  cl.user_session.set("llm_tutor", self.llm_tutor)
84
 
 
 
85
  @no_type_check
86
  async def update_llm(self, new_settings: Dict[str, Any]):
87
  """
 
164
  """
165
  Inform the user about the updated LLM settings and display them as a message.
166
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  await cl.Message(
168
  author=SYSTEM,
169
  content="LLM settings have been updated. You can continue with your Query!",
 
 
 
 
 
 
 
 
170
  ).send()
171
 
172
  async def set_starters(self):
173
  """
174
  Set starter messages for the chatbot.
175
  """
176
+
177
+ return [
178
+ cl.Starter(
179
+ label="recording on Transformers?",
180
+ message="Where can I find the recording for the lecture on Transformers?",
181
+ icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
182
+ ),
183
+ cl.Starter(
184
+ label="where's the slides?",
185
+ message="When are the lectures? I can't find the schedule.",
186
+ icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
187
+ ),
188
+ cl.Starter(
189
+ label="Due Date?",
190
+ message="When is the final project due?",
191
+ icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
192
+ ),
193
+ cl.Starter(
194
+ label="Explain backprop.",
195
+ message="I didn't understand the math behind backprop, could you explain it?",
196
+ icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
197
+ ),
198
+ ]
 
 
 
 
 
 
 
 
 
199
 
200
  def rename(self, orig_author: str):
201
  """
 
210
  rename_dict = {"Chatbot": LLM}
211
  return rename_dict.get(orig_author, orig_author)
212
 
213
+ async def start(self):
214
  """
215
  Start the chatbot, initialize settings widgets,
216
  and display and load previous conversation if chat logging is enabled.
217
  """
218
 
 
 
 
 
 
 
219
  await self.make_llm_settings_widgets(self.config) # Reload the settings widgets
220
 
 
 
221
  # TODO: remove self.user with cl.user_session.get("user")
222
+ self.user = {
223
+ "user_id": "guest",
224
+ "session_id": cl.context.session.thread_id,
225
+ }
 
 
 
 
 
 
 
226
 
227
  memory = cl.user_session.get("memory", [])
228
  self.llm_tutor = LLMTutor(self.config, user=self.user)
 
234
  cl.user_session.set("llm_tutor", self.llm_tutor)
235
  cl.user_session.set("chain", self.chain)
236
 
 
 
237
  async def stream_response(self, response):
238
  """
239
  Stream the response from the LLM.
 
264
  message: The incoming chat message.
265
  """
266
 
 
 
267
  chain = cl.user_session.get("chain")
268
  token_count = 0 # initialize token count
269
  if not chain:
 
309
  )
310
  answer_with_sources = answer_with_sources.replace("$$", "$")
311
 
 
 
312
  actions = []
313
 
314
  if self.config["llm_params"]["generate_follow_up"]:
 
315
  cb_follow_up = cl.AsyncLangchainCallbackHandler()
316
  config = {
317
  "callbacks": (
 
341
  )
342
  )
343
 
 
 
 
344
  await cl.Message(
345
  content=answer_with_sources,
346
  elements=source_elements,
347
  author=LLM,
348
  actions=actions,
 
349
  ).send()
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  async def on_follow_up(self, action: cl.Action):
352
  user = cl.user_session.get("user")
353
  message = await cl.Message(
 
366
 
367
 
368
  async def start_app():
 
 
369
  cl.set_starters(chatbot.set_starters)
370
  cl.author_rename(chatbot.rename)
371
  cl.on_chat_start(chatbot.start)
 
372
  cl.on_message(chatbot.main)
373
  cl.on_settings_update(chatbot.update_llm)
374
  cl.action_callback("follow up question")(chatbot.on_follow_up)
apps/chainlit_base/config/config.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: 'storage/logs' # str
2
+ log_chunk_dir: 'storage/logs/chunks' # str
3
+ device: 'cpu' # str [cuda, cpu]
4
+
5
+ vectorstore:
6
+ load_from_HF: True # bool
7
+ reparse_files: True # bool
8
+ data_path: 'storage/data' # str
9
+ url_file_path: 'storage/data/urls.txt' # str
10
+ expand_urls: True # bool
11
+ db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
+ db_path : 'vectorstores' # str
13
+ model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
+ search_top_k : 3 # int
15
+ score_threshold : 0.2 # float
16
+
17
+ faiss_params: # Not used as of now
18
+ index_path: 'vectorstores/faiss.index' # str
19
+ index_type: 'Flat' # str [Flat, HNSW, IVF]
20
+ index_dimension: 384 # int
21
+ index_nlist: 100 # int
22
+ index_nprobe: 10 # int
23
+
24
+ colbert_params:
25
+ index_name: "new_idx" # str
26
+
27
+ llm_params:
28
+ llm_arch: 'langchain' # [langchain]
29
+ use_history: True # bool
30
+ generate_follow_up: False # bool
31
+ memory_window: 3 # int
32
+ llm_style: 'Normal' # str [Normal, ELI5]
33
+ llm_loader: 'gpt-4o-mini' # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
34
+ openai_params:
35
+ temperature: 0.7 # float
36
+ local_llm_params:
37
+ temperature: 0.7 # float
38
+ repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
39
+ filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
40
+ model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
41
+ stream: False # bool
42
+ pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
43
+
44
+ chat_logging:
45
+ log_chat: True # bool
46
+ platform: 'literalai'
47
+ callbacks: True # bool
48
+
49
+ splitter_options:
50
+ use_splitter: True # bool
51
+ split_by_token : True # bool
52
+ remove_leftover_delimiters: True # bool
53
+ remove_chunks: False # bool
54
+ chunking_mode: 'semantic' # str [fixed, semantic]
55
+ chunk_size : 300 # int
56
+ chunk_overlap : 30 # int
57
+ chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
58
+ front_chunks_to_remove : null # int or None
59
+ last_chunks_to_remove : null # int or None
60
+ delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
apps/chainlit_base/config/config_manager.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, conint, confloat, HttpUrl
2
+ from typing import Optional, List
3
+ import yaml
4
+
5
+
6
+ class FaissParams(BaseModel):
7
+ index_path: str = "vectorstores/faiss.index"
8
+ index_type: str = "Flat" # Options: [Flat, HNSW, IVF]
9
+ index_dimension: conint(gt=0) = 384
10
+ index_nlist: conint(gt=0) = 100
11
+ index_nprobe: conint(gt=0) = 10
12
+
13
+
14
+ class ColbertParams(BaseModel):
15
+ index_name: str = "new_idx"
16
+
17
+
18
+ class VectorStoreConfig(BaseModel):
19
+ load_from_HF: bool = True
20
+ reparse_files: bool = True
21
+ data_path: str = "storage/data"
22
+ url_file_path: str = "storage/data/urls.txt"
23
+ expand_urls: bool = True
24
+ db_option: str = "RAGatouille" # Options: [FAISS, Chroma, RAGatouille, RAPTOR]
25
+ db_path: str = "vectorstores"
26
+ model: str = (
27
+ "sentence-transformers/all-MiniLM-L6-v2" # Options: [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002]
28
+ )
29
+ search_top_k: conint(gt=0) = 3
30
+ score_threshold: confloat(ge=0.0, le=1.0) = 0.2
31
+
32
+ faiss_params: Optional[FaissParams] = None
33
+ colbert_params: Optional[ColbertParams] = None
34
+
35
+
36
+ class OpenAIParams(BaseModel):
37
+ temperature: confloat(ge=0.0, le=1.0) = 0.7
38
+
39
+
40
+ class LocalLLMParams(BaseModel):
41
+ temperature: confloat(ge=0.0, le=1.0) = 0.7
42
+ repo_id: str = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
43
+ filename: str = (
44
+ "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
45
+ )
46
+ model_path: str = (
47
+ "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
48
+ )
49
+
50
+
51
+ class LLMParams(BaseModel):
52
+ llm_arch: str = "langchain" # Options: [langchain]
53
+ use_history: bool = True
54
+ generate_follow_up: bool = False
55
+ memory_window: conint(ge=1) = 3
56
+ llm_style: str = "Normal" # Options: [Normal, ELI5]
57
+ llm_loader: str = (
58
+ "gpt-4o-mini" # Options: [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
59
+ )
60
+ openai_params: Optional[OpenAIParams] = None
61
+ local_llm_params: Optional[LocalLLMParams] = None
62
+ stream: bool = False
63
+ pdf_reader: str = "gpt" # Options: [llama, pymupdf, gpt]
64
+
65
+
66
+ class ChatLoggingConfig(BaseModel):
67
+ log_chat: bool = True
68
+ platform: str = "literalai"
69
+ callbacks: bool = True
70
+
71
+
72
+ class SplitterOptions(BaseModel):
73
+ use_splitter: bool = True
74
+ split_by_token: bool = True
75
+ remove_leftover_delimiters: bool = True
76
+ remove_chunks: bool = False
77
+ chunking_mode: str = "semantic" # Options: [fixed, semantic]
78
+ chunk_size: conint(gt=0) = 300
79
+ chunk_overlap: conint(ge=0) = 30
80
+ chunk_separators: List[str] = ["\n\n", "\n", " ", ""]
81
+ front_chunks_to_remove: Optional[conint(ge=0)] = None
82
+ last_chunks_to_remove: Optional[conint(ge=0)] = None
83
+ delimiters_to_remove: List[str] = ["\t", "\n", " ", " "]
84
+
85
+
86
+ class RetrieverConfig(BaseModel):
87
+ retriever_hf_paths: dict[str, str] = {"RAGatouille": "XThomasBU/Colbert_Index"}
88
+
89
+
90
+ class MetadataConfig(BaseModel):
91
+ metadata_links: List[HttpUrl] = [
92
+ "https://dl4ds.github.io/sp2024/lectures/",
93
+ "https://dl4ds.github.io/sp2024/schedule/",
94
+ ]
95
+ slide_base_link: HttpUrl = "https://dl4ds.github.io"
96
+
97
+
98
+ class APIConfig(BaseModel):
99
+ timeout: conint(gt=0) = 60
100
+
101
+
102
+ class Config(BaseModel):
103
+ log_dir: str = "storage/logs"
104
+ log_chunk_dir: str = "storage/logs/chunks"
105
+ device: str = "cpu" # Options: ['cuda', 'cpu']
106
+
107
+ vectorstore: VectorStoreConfig
108
+ llm_params: LLMParams
109
+ chat_logging: ChatLoggingConfig
110
+ splitter_options: SplitterOptions
111
+ retriever: RetrieverConfig
112
+ metadata: MetadataConfig
113
+ api_config: APIConfig
114
+
115
+
116
+ class ConfigManager:
117
+ def __init__(self, config_path: str, project_config_path: str):
118
+ self.config_path = config_path
119
+ self.project_config_path = project_config_path
120
+ self.config = self.load_config()
121
+ self.validate_config()
122
+
123
+ def load_config(self) -> Config:
124
+ with open(self.config_path, "r") as f:
125
+ config_data = yaml.safe_load(f)
126
+
127
+ with open(self.project_config_path, "r") as f:
128
+ project_config_data = yaml.safe_load(f)
129
+
130
+ # Merge the two configurations
131
+ merged_config = {**config_data, **project_config_data}
132
+
133
+ return Config(**merged_config)
134
+
135
+ def get_config(self) -> Config:
136
+ return ConfigWrapper(self.config)
137
+
138
+ def validate_config(self):
139
+ # If any required fields are missing, raise an error
140
+ # required_fields = [
141
+ # "vectorstore", "llm_params", "chat_logging", "splitter_options",
142
+ # "retriever", "metadata", "token_config", "misc", "api_config"
143
+ # ]
144
+ # for field in required_fields:
145
+ # if not hasattr(self.config, field):
146
+ # raise ValueError(f"Missing required configuration field: {field}")
147
+
148
+ # # Validate types of specific fields
149
+ # if not isinstance(self.config.vectorstore, VectorStoreConfig):
150
+ # raise TypeError("vectorstore must be an instance of VectorStoreConfig")
151
+ # if not isinstance(self.config.llm_params, LLMParams):
152
+ # raise TypeError("llm_params must be an instance of LLMParams")
153
+ pass
154
+
155
+
156
+ class ConfigWrapper:
157
+ def __init__(self, config: Config):
158
+ self._config = config
159
+
160
+ def __getitem__(self, key):
161
+ return getattr(self._config, key)
162
+
163
+ def __getattr__(self, name):
164
+ return getattr(self._config, name)
165
+
166
+ def dict(self):
167
+ return self._config.dict()
168
+
169
+
170
+ # Usage
171
+ config_manager = ConfigManager(
172
+ config_path="config/config.yml", project_config_path="config/project_config.yml"
173
+ )
174
+ # config = config_manager.get_config().dict()
{code/modules β†’ apps/chainlit_base}/config/project_config.yml RENAMED
@@ -4,4 +4,7 @@ retriever:
4
 
5
  metadata:
6
  metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
7
- slide_base_link: "https://dl4ds.github.io"
 
 
 
 
4
 
5
  metadata:
6
  metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
7
+ slide_base_link: "https://dl4ds.github.io"
8
+
9
+ api_config:
10
+ timeout: 60
apps/chainlit_base/config/prompts.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompts = {
2
+ "openai": {
3
+ "rephrase_prompt": (
4
+ "You are someone that rephrases statements. Rephrase the student's question to add context from their chat history if relevant, ensuring it remains from the student's point of view. "
5
+ "Incorporate relevant details from the chat history to make the question clearer and more specific. "
6
+ "Do not change the meaning of the original statement, and maintain the student's tone and perspective. "
7
+ "If the question is conversational and doesn't require context, do not rephrase it. "
8
+ "Example: If the student previously asked about backpropagation in the context of deep learning and now asks 'what is it', rephrase to 'What is backpropagation.'. "
9
+ "Example: Do not rephrase if the user is asking something specific like 'cool, suggest a project with transformers to use as my final project' "
10
+ "Chat history: \n{chat_history}\n"
11
+ "Rephrase the following question only if necessary: '{input}'"
12
+ "Rephrased Question:'"
13
+ ),
14
+ "prompt_with_history": {
15
+ "normal": (
16
+ "You are an AI Tutor for the course DS598, taught by Prof. Thomas Gardos. Answer the user's question using the provided context. Only use the context if it is relevant. The context is ordered by relevance. "
17
+ "If you don't know the answer, do your best without making things up. Keep the conversation flowing naturally. "
18
+ "Use chat history and context as guides but avoid repeating past responses. Provide links from the source_file metadata. Use the source context that is most relevant. "
19
+ "Render math equations in LaTeX format between $ or $$ signs, stick to the parameter and variable icons found in your context. Be sure to explain the parameters and variables in the equations."
20
+ "Speak in a friendly and engaging manner, like talking to a friend. Avoid sounding repetitive or robotic.\n\n"
21
+ "Do not get influenced by the style of conversation in the chat history. Follow the instructions given here."
22
+ "Chat History:\n{chat_history}\n\n"
23
+ "Context:\n{context}\n\n"
24
+ "Answer the student's question below in a friendly, concise, and engaging manner. Use the context and history only if relevant, otherwise, engage in a free-flowing conversation.\n"
25
+ "Student: {input}\n"
26
+ "AI Tutor:"
27
+ ),
28
+ "eli5": (
29
+ "You are an AI Tutor for the course DS598, taught by Prof. Thomas Gardos. Your job is to explain things in the simplest and most engaging way possible, just like the 'Explain Like I'm 5' (ELI5) concept."
30
+ "If you don't know the answer, do your best without making things up. Keep your explanations straightforward and very easy to understand."
31
+ "Use the chat history and context to help you, but avoid repeating past responses. Provide links from the source_file metadata when they're helpful."
32
+ "Use very simple language and examples to explain any math equations, and put the equations in LaTeX format between $ or $$ signs."
33
+ "Be friendly and engaging, like you're chatting with a young child who's curious and eager to learn. Avoid complex terms and jargon."
34
+ "Include simple and clear examples wherever you can to make things easier to understand."
35
+ "Do not get influenced by the style of conversation in the chat history. Follow the instructions given here."
36
+ "Chat History:\n{chat_history}\n\n"
37
+ "Context:\n{context}\n\n"
38
+ "Answer the student's question below in a friendly, simple, and engaging way, just like the ELI5 concept. Use the context and history only if they're relevant, otherwise, just have a natural conversation."
39
+ "Give a clear and detailed explanation with simple examples to make it easier to understand. Remember, your goal is to break down complex topics into very simple terms, just like ELI5."
40
+ "Student: {input}\n"
41
+ "AI Tutor:"
42
+ ),
43
+ "socratic": (
44
+ "You are an AI Tutor for the course DS598, taught by Prof. Thomas Gardos. Engage the student in a Socratic dialogue to help them discover answers on their own. Use the provided context to guide your questioning."
45
+ "If you don't know the answer, do your best without making things up. Keep the conversation engaging and inquisitive."
46
+ "Use chat history and context as guides but avoid repeating past responses. Provide links from the source_file metadata when relevant. Use the source context that is most relevant."
47
+ "Speak in a friendly and engaging manner, encouraging critical thinking and self-discovery."
48
+ "Use questions to lead the student to explore the topic and uncover answers."
49
+ "Chat History:\n{chat_history}\n\n"
50
+ "Context:\n{context}\n\n"
51
+ "Answer the student's question below by guiding them through a series of questions and insights that lead to deeper understanding. Use the context and history only if relevant, otherwise, engage in a free-flowing conversation."
52
+ "Foster an inquisitive mindset and help the student discover answers through dialogue."
53
+ "Student: {input}\n"
54
+ "AI Tutor:"
55
+ ),
56
+ },
57
+ "prompt_no_history": (
58
+ "You are an AI Tutor for the course DS598, taught by Prof. Thomas Gardos. Answer the user's question using the provided context. Only use the context if it is relevant. The context is ordered by relevance. "
59
+ "If you don't know the answer, do your best without making things up. Keep the conversation flowing naturally. "
60
+ "Provide links from the source_file metadata. Use the source context that is most relevant. "
61
+ "Speak in a friendly and engaging manner, like talking to a friend. Avoid sounding repetitive or robotic.\n\n"
62
+ "Context:\n{context}\n\n"
63
+ "Answer the student's question below in a friendly, concise, and engaging manner. Use the context and history only if relevant, otherwise, engage in a free-flowing conversation.\n"
64
+ "Student: {input}\n"
65
+ "AI Tutor:"
66
+ ),
67
+ },
68
+ "tiny_llama": {
69
+ "prompt_no_history": (
70
+ "system\n"
71
+ "Assistant is an intelligent chatbot designed to help students with questions regarding the course DS598, taught by Prof. Thomas Gardos. Answer the user's question using the provided context. Only use the context if it is relevant. The context is ordered by relevance.\n"
72
+ "If you don't know the answer, do your best without making things up. Keep the conversation flowing naturally.\n"
73
+ "Provide links from the source_file metadata. Use the source context that is most relevant.\n"
74
+ "Speak in a friendly and engaging manner, like talking to a friend. Avoid sounding repetitive or robotic.\n"
75
+ "\n\n"
76
+ "user\n"
77
+ "Context:\n{context}\n\n"
78
+ "Question: {input}\n"
79
+ "\n\n"
80
+ "assistant"
81
+ ),
82
+ "prompt_with_history": (
83
+ "system\n"
84
+ "You are an AI Tutor for the course DS598, taught by Prof. Thomas Gardos. Answer the user's question using the provided context. Only use the context if it is relevant. The context is ordered by relevance. "
85
+ "If you don't know the answer, do your best without making things up. Keep the conversation flowing naturally. "
86
+ "Use chat history and context as guides but avoid repeating past responses. Provide links from the source_file metadata. Use the source context that is most relevant. "
87
+ "Speak in a friendly and engaging manner, like talking to a friend. Avoid sounding repetitive or robotic.\n"
88
+ "\n\n"
89
+ "user\n"
90
+ "Chat History:\n{chat_history}\n\n"
91
+ "Context:\n{context}\n\n"
92
+ "Question: {input}\n"
93
+ "\n\n"
94
+ "assistant"
95
+ ),
96
+ },
97
+ }
apps/chainlit_base/public/assets/images/avatars/ai-tutor.png ADDED
apps/chainlit_base/public/assets/images/avatars/ai_tutor.png ADDED
apps/chainlit_base/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg ADDED
apps/chainlit_base/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg ADDED
apps/chainlit_base/public/assets/images/starter_icons/alarmy-svgrepo-com.svg ADDED
apps/chainlit_base/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg ADDED
apps/chainlit_base/public/files/students_encrypted.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"7f1cacca66ee914ddde2ee20e0f2c96651d60cd8aabd310ef25a9e6d88f42df0": ["instructor", "bu"], "f74d264b6b5b2b4c10ce69e4ec16e869e01cb5eb668ed846aa8f6dae5c96cda0": ["admin", "instructor", "bu"], "53401356a874b1539775c73a8564d5e5f4f840441630c9cf649e16d201454f20": ["instructor", "bu"]}
{code/public β†’ apps/chainlit_base/public/files}/test.css RENAMED
@@ -2,20 +2,23 @@ a[href*='https://github.com/Chainlit/chainlit'] {
2
  visibility: hidden;
3
  }
4
 
5
-
 
 
 
 
 
6
  /* Target the container of the image and set a custom background image */
7
  .MuiAvatar-root.MuiAvatar-circular.css-m2icte {
8
- background-image: url('/public/avatars/ai-tutor.png'); /* Replace with your custom image URL */
9
  background-size: cover; /* Ensure the image covers the entire container */
10
  background-position: center; /* Center the image */
11
  width: 100px; /* Ensure the dimensions match the original */
12
  height: 100px; /* Ensure the dimensions match the original */
13
  border-radius: 50%; /* Maintain circular shape */
14
  }
15
-
16
-
17
  .MuiAvatar-root.MuiAvatar-circular.css-v72an7 {
18
- background-image: url('/public/avatars/ai-tutor.png'); /* Replace with your custom image URL */
19
  background-size: cover; /* Ensure the image covers the entire container */
20
  background-position: center; /* Center the image */
21
  width: 40px; /* Ensure the dimensions match the original */
 
2
  visibility: hidden;
3
  }
4
 
5
+ /* TODO: Default image is still shown in the starter questions */
6
+ /* Hide the default avatar image */
7
+ .MuiAvatar-root img.MuiAvatar-img {
8
+ display: none;
9
+ }
10
+
11
  /* Target the container of the image and set a custom background image */
12
  .MuiAvatar-root.MuiAvatar-circular.css-m2icte {
13
+ background-image: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with your custom image URL */
14
  background-size: cover; /* Ensure the image covers the entire container */
15
  background-position: center; /* Center the image */
16
  width: 100px; /* Ensure the dimensions match the original */
17
  height: 100px; /* Ensure the dimensions match the original */
18
  border-radius: 50%; /* Maintain circular shape */
19
  }
 
 
20
  .MuiAvatar-root.MuiAvatar-circular.css-v72an7 {
21
+ background-image: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with your custom image URL */
22
  background-size: cover; /* Ensure the image covers the entire container */
23
  background-position: center; /* Center the image */
24
  width: 40px; /* Ensure the dimensions match the original */
apps/chainlit_base/public/logo_dark.png ADDED