mpav commited on
Commit
2e4622d
·
1 Parent(s): 32d116c
.chainlit/config.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  # Whether to enable telemetry (default: true). No personal data is collected.
3
- enable_telemetry = true
4
 
5
  # List of environment variables to be provided by each user to use the app.
6
  user_env = []
@@ -59,6 +59,9 @@ hide_cot = false
59
  # The CSS file can be served from the public directory or via an external link.
60
  custom_css = "/public/stylesheet.css"
61
 
 
 
 
62
  # Override default MUI light theme. (Check theme.ts)
63
  [UI.theme.light]
64
  #background = "#008080"
 
1
  [project]
2
  # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = false
4
 
5
  # List of environment variables to be provided by each user to use the app.
6
  user_env = []
 
59
  # The CSS file can be served from the public directory or via an external link.
60
  custom_css = "/public/stylesheet.css"
61
 
62
+ [UI.theme]
63
+ default = "light"
64
+
65
  # Override default MUI light theme. (Check theme.ts)
66
  [UI.theme.light]
67
  #background = "#008080"
.chainlit/translations/en-US.json ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Settings",
8
+ "settingsKey": "S",
9
+ "APIKeys": "API Keys",
10
+ "logout": "Logout"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "New Chat"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Task List",
22
+ "loading": "Loading...",
23
+ "error": "An error occurred"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancel upload",
28
+ "removeAttachment": "Remove attachment"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Create new chat?",
32
+ "clearChat": "This will clear the current messages and start a new chat.",
33
+ "cancel": "Cancel",
34
+ "confirm": "Confirm"
35
+ },
36
+ "settingsModal": {
37
+ "settings": "Settings",
38
+ "expandMessages": "Expand Messages",
39
+ "hideChainOfThought": "Hide Chain of Thought",
40
+ "darkMode": "Dark Mode"
41
+ },
42
+ "detailsButton": {
43
+ "using": "Using",
44
+ "used": "Used"
45
+ },
46
+ "auth": {
47
+ "authLogin": {
48
+ "title": "Login to access the app.",
49
+ "form": {
50
+ "email": "Email address",
51
+ "password": "Password",
52
+ "noAccount": "Don't have an account?",
53
+ "alreadyHaveAccount": "Already have an account?",
54
+ "signup": "Sign Up",
55
+ "signin": "Sign In",
56
+ "or": "OR",
57
+ "continue": "Continue",
58
+ "forgotPassword": "Forgot password?",
59
+ "passwordMustContain": "Your password must contain:",
60
+ "emailRequired": "email is a required field",
61
+ "passwordRequired": "password is a required field"
62
+ },
63
+ "error": {
64
+ "default": "Unable to sign in.",
65
+ "signin": "Try signing in with a different account.",
66
+ "oauthsignin": "Try signing in with a different account.",
67
+ "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
68
+ "oauthcallbackerror": "Try signing in with a different account.",
69
+ "oauthcreateaccount": "Try signing in with a different account.",
70
+ "emailcreateaccount": "Try signing in with a different account.",
71
+ "callback": "Try signing in with a different account.",
72
+ "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
73
+ "emailsignin": "The e-mail could not be sent.",
74
+ "emailverify": "Please verify your email, a new email has been sent.",
75
+ "credentialssignin": "Sign in failed. Check the details you provided are correct.",
76
+ "sessionrequired": "Please sign in to access this page."
77
+ }
78
+ },
79
+ "authVerifyEmail": {
80
+ "almostThere": "You're almost there! We've sent an email to ",
81
+ "verifyEmailLink": "Please click on the link in that email to complete your signup.",
82
+ "didNotReceive": "Can't find the email?",
83
+ "resendEmail": "Resend email",
84
+ "goBack": "Go Back",
85
+ "emailSent": "Email sent successfully.",
86
+ "verifyEmail": "Verify your email address"
87
+ },
88
+ "providerButton": {
89
+ "continue": "Continue with {{provider}}",
90
+ "signup": "Sign up with {{provider}}"
91
+ },
92
+ "authResetPassword": {
93
+ "newPasswordRequired": "New password is a required field",
94
+ "passwordsMustMatch": "Passwords must match",
95
+ "confirmPasswordRequired": "Confirm password is a required field",
96
+ "newPassword": "New password",
97
+ "confirmPassword": "Confirm password",
98
+ "resetPassword": "Reset Password"
99
+ },
100
+ "authForgotPassword": {
101
+ "email": "Email address",
102
+ "emailRequired": "email is a required field",
103
+ "emailSent": "Please check the email address {{email}} for instructions to reset your password.",
104
+ "enterEmail": "Enter your email address and we will send you instructions to reset your password.",
105
+ "resendEmail": "Resend email",
106
+ "continue": "Continue",
107
+ "goBack": "Go Back"
108
+ }
109
+ }
110
+ },
111
+ "organisms": {
112
+ "chat": {
113
+ "history": {
114
+ "index": {
115
+ "showHistory": "Show history",
116
+ "lastInputs": "Last Inputs",
117
+ "noInputs": "Such empty...",
118
+ "loading": "Loading..."
119
+ }
120
+ },
121
+ "inputBox": {
122
+ "input": {
123
+ "placeholder": "Type your message here..."
124
+ },
125
+ "speechButton": {
126
+ "start": "Start recording",
127
+ "stop": "Stop recording"
128
+ },
129
+ "SubmitButton": {
130
+ "sendMessage": "Send message",
131
+ "stopTask": "Stop Task"
132
+ },
133
+ "UploadButton": {
134
+ "attachFiles": "Attach files"
135
+ },
136
+ "waterMark": {
137
+ "text": "Built with"
138
+ }
139
+ },
140
+ "Messages": {
141
+ "index": {
142
+ "running": "Running",
143
+ "executedSuccessfully": "executed successfully",
144
+ "failed": "failed",
145
+ "feedbackUpdated": "Feedback updated",
146
+ "updating": "Updating"
147
+ }
148
+ },
149
+ "dropScreen": {
150
+ "dropYourFilesHere": "Drop your files here"
151
+ },
152
+ "index": {
153
+ "failedToUpload": "Failed to upload",
154
+ "cancelledUploadOf": "Cancelled upload of",
155
+ "couldNotReachServer": "Could not reach the server",
156
+ "continuingChat": "Continuing previous chat"
157
+ },
158
+ "settings": {
159
+ "settingsPanel": "Settings panel",
160
+ "reset": "Reset",
161
+ "cancel": "Cancel",
162
+ "confirm": "Confirm"
163
+ }
164
+ },
165
+ "threadHistory": {
166
+ "sidebar": {
167
+ "filters": {
168
+ "FeedbackSelect": {
169
+ "feedbackAll": "Feedback: All",
170
+ "feedbackPositive": "Feedback: Positive",
171
+ "feedbackNegative": "Feedback: Negative"
172
+ },
173
+ "SearchBar": {
174
+ "search": "Search"
175
+ }
176
+ },
177
+ "DeleteThreadButton": {
178
+ "confirmMessage": "This will delete the thread as well as it's messages and elements.",
179
+ "cancel": "Cancel",
180
+ "confirm": "Confirm",
181
+ "deletingChat": "Deleting chat",
182
+ "chatDeleted": "Chat deleted"
183
+ },
184
+ "index": {
185
+ "pastChats": "Past Chats"
186
+ },
187
+ "ThreadList": {
188
+ "empty": "Empty...",
189
+ "today": "Today",
190
+ "yesterday": "Yesterday",
191
+ "previous7days": "Previous 7 days",
192
+ "previous30days": "Previous 30 days"
193
+ },
194
+ "TriggerButton": {
195
+ "closeSidebar": "Close sidebar",
196
+ "openSidebar": "Open sidebar"
197
+ }
198
+ },
199
+ "Thread": {
200
+ "backToChat": "Go back to chat",
201
+ "chatCreatedOn": "This chat was created on"
202
+ }
203
+ },
204
+ "header": {
205
+ "chat": "Chat",
206
+ "readme": "Readme"
207
+ }
208
+ }
209
+ },
210
+ "hooks": {
211
+ "useLLMProviders": {
212
+ "failedToFetchProviders": "Failed to fetch providers:"
213
+ }
214
+ },
215
+ "pages": {
216
+ "Design": {},
217
+ "Env": {
218
+ "savedSuccessfully": "Saved successfully",
219
+ "requiredApiKeys": "Required API Keys",
220
+ "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
221
+ },
222
+ "Page": {
223
+ "notPartOfProject": "You are not part of this project."
224
+ },
225
+ "ResumeButton": {
226
+ "resumeChat": "Resume Chat"
227
+ }
228
+ }
229
+ }
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
__pycache__/starters.cpython-311.pyc ADDED
Binary file (1.15 kB). View file
 
app.py CHANGED
@@ -1,21 +1,16 @@
1
  # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
2
 
3
- import os
4
- from openai import AsyncOpenAI # importing openai for API usage
5
  import chainlit as cl # importing chainlit for our app
6
- from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
7
- #from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
8
  from dotenv import load_dotenv
9
- from chainlit.types import AskFileResponse
10
 
11
- import asyncio
12
  from langchain.document_loaders import PyMuPDFLoader, PyPDFLoader
13
  from langchain_openai import ChatOpenAI
14
  from langchain_core.prompts import ChatPromptTemplate
15
  import tiktoken
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
17
  from langchain_openai.embeddings import OpenAIEmbeddings
18
- from utils.custom_retriver import CustomQDrant, CustomVectorStoreRetriever
 
19
 
20
 
21
  load_dotenv()
@@ -30,14 +25,16 @@ CONTEXT:
30
  QUERY:
31
  {question}
32
 
33
- Answer questions only based on provided context and not your previous knowledge.
34
  In your answer never mention phrases like Based on provided context, From the context etc.
35
- If you don't know the answer say I don't know!
 
 
36
  """
37
 
38
  data_path = "data/airbnb_midterm.pdf"
39
  docs = PyMuPDFLoader(data_path).load()
40
- openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo") #gpt-4o
41
 
42
  def tiktoken_len(text):
43
  tokens = tiktoken.encoding_for_model("gpt-4o").encode(
@@ -46,7 +43,7 @@ def tiktoken_len(text):
46
  return len(tokens)
47
 
48
  text_splitter = RecursiveCharacterTextSplitter(
49
- chunk_size = 500,
50
  chunk_overlap = 10,
51
  length_function = tiktoken_len,
52
  )
@@ -100,7 +97,7 @@ async def start_chat():
100
 
101
 
102
 
103
- cl.user_session.set("chain", retrieval_augmented_qa_chain)
104
 
105
 
106
 
@@ -128,7 +125,7 @@ async def main(message: cl.Message):
128
  source_names = [text_el.name for text_el in text_elements]
129
 
130
  if source_names:
131
- resp_msg += f"\nSources: {', '.join(source_names)}"
132
  else:
133
  resp_msg += "\nNo sources found"
134
 
 
1
  # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
2
 
 
 
3
  import chainlit as cl # importing chainlit for our app
 
 
4
  from dotenv import load_dotenv
 
5
 
 
6
  from langchain.document_loaders import PyMuPDFLoader, PyPDFLoader
7
  from langchain_openai import ChatOpenAI
8
  from langchain_core.prompts import ChatPromptTemplate
9
  import tiktoken
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_openai.embeddings import OpenAIEmbeddings
12
+ from utils.custom_retriver import CustomQDrant
13
+ from starters import set_starters
14
 
15
 
16
  load_dotenv()
 
25
  QUERY:
26
  {question}
27
 
28
+ Answer questions first based on provided context and if you can't find answer in provided context, use your previous knowledge.
29
  In your answer never mention phrases like Based on provided context, From the context etc.
30
+
31
+ At the end of each answer add CONTEXT CONFIDENCE tag -> answer vs. context similarity score -> faithfulness - answer in percent e.g. 85%.
32
+ Also add CONTEXT vs PRIOR tag: break answer to what you find in provided context and what you build from your prior knowledge.
33
  """
34
 
35
  data_path = "data/airbnb_midterm.pdf"
36
  docs = PyMuPDFLoader(data_path).load()
37
+ openai_chat_model = ChatOpenAI(model="gpt-4o") #gpt-4o
38
 
39
  def tiktoken_len(text):
40
  tokens = tiktoken.encoding_for_model("gpt-4o").encode(
 
43
  return len(tokens)
44
 
45
  text_splitter = RecursiveCharacterTextSplitter(
46
+ chunk_size = 200,
47
  chunk_overlap = 10,
48
  length_function = tiktoken_len,
49
  )
 
97
 
98
 
99
 
100
+ cl.user_session.set("chain", retrieval_augmented_qa_chain, )
101
 
102
 
103
 
 
125
  source_names = [text_el.name for text_el in text_elements]
126
 
127
  if source_names:
128
+ resp_msg += f"\n\nSources: {', '.join(source_names)}"
129
  else:
130
  resp_msg += "\nNo sources found"
131
 
chainlit.md CHANGED
@@ -2,8 +2,86 @@
2
 
3
  This app is developed for **AI Engineering bootcamp!**
4
 
5
- ## Useful Links 🔗
6
 
7
- - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
8
- - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
2
 
3
  This app is developed for **AI Engineering bootcamp!**
4
 
5
+ ### Prompt approach:
6
 
7
+ **Classic:**
8
+
9
+ ```
10
+ CONTEXT:
11
+ {{context}}
12
+
13
+ QUERY:
14
+ {{question}}
15
+
16
+ Answer questions only based on provided context and not your previous knowledge.
17
+ In your answer never mention phrases like Based on provided context, From the context etc.
18
+ If you don't know the answer say I don't know!
19
+ ```
20
+ ### My idea was to move away from this 101 RAG prompt apporach and have LLM give information regarding answer construction in a way - context vs. prior knowledge!
21
+
22
+ This is still not the best approach, and I would prefer to use more advanced evaluation tehnics, but it helps us understand hallucinations better.
23
+ Additionally, 4o model answers can understand this type of prompt, while GPT 3.5 gets confused between prior knowlege and provided context.
24
+
25
+ **My test approach:**
26
+
27
+ ```
28
+ CONTEXT:
29
+ {{context}}
30
+
31
+ QUERY:
32
+ {{question}}
33
+
34
+ Answer questions only based on provided context and not your previous knowledge.
35
+ In your answer never mention phrases like Based on provided context, From the context etc.
36
+ If you don't know the answer say I don't know!
37
+
38
+ ```
39
+
40
+ ## Question answers 🔗
41
+
42
+ 1. **"What is Airbnb's 'Description of Business'?"**
43
+
44
+ Airbnb, Inc. was incorporated in Delaware in June 2008 and is headquartered in San Francisco, California. The company operates a global platform for unique stays and experiences. Its marketplace model connects hosts and guests (collectively referred to as “customers”) online or through mobile devices to book spaces and experiences around the world.
45
+
46
+ CONTEXT CONFIDENCE: 100%
47
+ CONTEXT vs PRIOR: The answer is entirely based on the provided context.
48
+
49
+ Sources: source_0 (scr: 0.58), source_1 (scr: 0.47), source_2 (scr: 0.45), source_3 (scr: 0.45)
50
+
51
+ 2. **"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"**
52
+
53
+ The total value of 'Cash and cash equivalents' as of December 31, 2023, was $12,667 million.
54
+
55
+ CONTEXT CONFIDENCE: 100%
56
+ CONTEXT vs PRIOR:
57
+ - From context: The specific figure of $12,667 million for 'Cash and cash equivalents' as of December 31, 2023.
58
+ - Prior knowledge: None used.
59
+
60
+ Sources: source_0 (scr: 0.57), source_1 (scr: 0.54), source_2 (scr: 0.54), source_3 (scr: 0.53)
61
+
62
+
63
+ 3. **"What is the 'maximum number of shares to be sold under the 10b5-1 Trading plan' by Brian Chesky?"**
64
+
65
+ The maximum number of shares to be sold under the 10b5-1 Trading Plan by Brian Chesky is 1,146,000.
66
+
67
+ CONTEXT CONFIDENCE: 100%
68
+ CONTEXT vs PRIOR:
69
+ - Context: The maximum number of shares to be sold under the 10b5-1 Trading Plan by Brian Chesky.
70
+ - Prior: None.
71
+
72
+ Sources: source_0 (scr: 0.64), source_1 (scr: 0.47), source_2 (scr: 0.45), source_3 (scr: 0.44)
73
+
74
+
75
+ ## Example: multiple context question
76
+
77
+ 4. **In what club Luka Modrić plays and who is Brian Chesky?**
78
+
79
+ Luka Modrić plays for Real Madrid, a professional football club based in Madrid, Spain. Brian Chesky is the Chief Executive Officer (CEO) of Airbnb, Inc.
80
+
81
+ CONTEXT CONFIDENCE: 100% for Brian Chesky, 0% for Luka Modrić.
82
+ CONTEXT vs PRIOR:
83
+ - **Context**: Brian Chesky is identified as the CEO of Airbnb, Inc.
84
+ - **Prior Knowledge**: Luka Modrić plays for Real Madrid.
85
+
86
+ Sources: source_0 (scr: 0.36), source_1 (scr: 0.32), source_2 (scr: 0.32), source_3 (scr: 0.32)
87
 
public/airbnb.svg ADDED
public/barfin.svg ADDED
public/fund.svg ADDED
public/light.svg ADDED
public/logo_dark.png CHANGED
public/soccer.svg ADDED
public/stylesheet.css CHANGED
@@ -2,6 +2,11 @@ img {
2
  max-height: 70px !important;
3
  }
4
 
 
 
 
 
 
5
  /* Hide the original text */
6
  .css-pcmo6i {
7
  visibility: hidden;
@@ -16,6 +21,10 @@ img {
16
  display: block;
17
  }
18
 
 
 
 
 
19
  img[alt="watermark"] {
20
  visibility: hidden;
21
  }
 
2
  max-height: 70px !important;
3
  }
4
 
5
+ .css-8mm1u0 {
6
+ background-color: #288b8f !important;
7
+ color: white;
8
+ }
9
+
10
  /* Hide the original text */
11
  .css-pcmo6i {
12
  visibility: hidden;
 
21
  display: block;
22
  }
23
 
24
+ svg[viewBox="0 0 1143 266"] {
25
+ visibility: hidden;
26
+ }
27
+
28
  img[alt="watermark"] {
29
  visibility: hidden;
30
  }
starters.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chainlit as cl
2
+
3
+ @cl.set_starters
4
+ async def set_starters():
5
+ return [
6
+ cl.Starter(
7
+ label="AirBnB info",
8
+ message="What is Airbnb's 'Description of Business'?",
9
+ icon="/public/airbnb.svg",
10
+ ),
11
+
12
+ cl.Starter(
13
+ label="Cash and cash equivalents",
14
+ message="What was the total value of 'Cash and cash equivalents' as of December 31, 2023?",
15
+ icon="/public/barfin.svg",
16
+ ),
17
+ cl.Starter(
18
+ label="shares by Brian Chesky",
19
+ message="What is the 'maximum number of shares to be sold under the 10b5-1 Trading plan' by Brian Chesky?",
20
+ icon="/public/light.svg",
21
+ ),
22
+
23
+ cl.Starter(
24
+ label="Luka vs. Brian",
25
+ message="In what club Luka Modrić plays and who is Brian Chesky?",
26
+ icon="/public/soccer.svg",
27
+ )
28
+
29
+ ]
30
+ # ...
utils/__pycache__/custom_retriver.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/custom_retriver.cpython-311.pyc and b/utils/__pycache__/custom_retriver.cpython-311.pyc differ