Spaces:

defsound
/

midterm-airbnb

Runtime error

App Files Files Community

mpav commited on Jun 24, 2024

Commit

2e4622d

1 Parent(s): 32d116c

pobjeda

Browse files

Files changed (15) hide show

.chainlit/config.toml +4 -1
.chainlit/translations/en-US.json +229 -0
__pycache__/app.cpython-311.pyc +0 -0
__pycache__/starters.cpython-311.pyc +0 -0
app.py +10 -13
chainlit.md +81 -3
public/airbnb.svg +1 -0
public/barfin.svg +12 -0
public/fund.svg +3 -0
public/light.svg +3 -0
public/logo_dark.png +0 -0
public/soccer.svg +86 -0
public/stylesheet.css +9 -0
starters.py +30 -0
utils/__pycache__/custom_retriver.cpython-311.pyc +0 -0

.chainlit/config.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 # Whether to enable telemetry (default: true). No personal data is collected.
-enable_telemetry = true
 # List of environment variables to be provided by each user to use the app.
 user_env = []
@@ -59,6 +59,9 @@ hide_cot = false
 # The CSS file can be served from the public directory or via an external link.
 custom_css = "/public/stylesheet.css"
 # Override default MUI light theme. (Check theme.ts)
 [UI.theme.light]
     #background = "#008080"

 [project]
 # Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = false
 # List of environment variables to be provided by each user to use the app.
 user_env = []
 # The CSS file can be served from the public directory or via an external link.
 custom_css = "/public/stylesheet.css"
+[UI.theme]
+    default = "light"
 # Override default MUI light theme. (Check theme.ts)
 [UI.theme.light]
     #background = "#008080"

.chainlit/translations/en-US.json ADDED Viewed

	@@ -0,0 +1,229 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Settings",
+                        "settingsKey": "S",
+                        "APIKeys": "API Keys",
+                        "logout": "Logout"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "New Chat"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Task List",
+                    "loading": "Loading...",
+                    "error": "An error occurred"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancel upload",
+                "removeAttachment": "Remove attachment"
+            },
+            "newChatDialog": {
+                "createNewChat": "Create new chat?",
+                "clearChat": "This will clear the current messages and start a new chat.",
+                "cancel": "Cancel",
+                "confirm": "Confirm"
+            },
+            "settingsModal": {
+                "settings": "Settings",
+                "expandMessages": "Expand Messages",
+                "hideChainOfThought": "Hide Chain of Thought",
+                "darkMode": "Dark Mode"
+            },
+            "detailsButton": {
+                "using": "Using",
+                "used": "Used"
+            },
+            "auth": {
+                "authLogin": {
+                    "title": "Login to access the app.",
+                    "form": {
+                        "email": "Email address",
+                        "password": "Password",
+                        "noAccount": "Don't have an account?",
+                        "alreadyHaveAccount": "Already have an account?",
+                        "signup": "Sign Up",
+                        "signin": "Sign In",
+                        "or": "OR",
+                        "continue": "Continue",
+                        "forgotPassword": "Forgot password?",
+                        "passwordMustContain": "Your password must contain:",
+                        "emailRequired": "email is a required field",
+                        "passwordRequired": "password is a required field"
+                    },
+                    "error": {
+                        "default": "Unable to sign in.",
+                        "signin": "Try signing in with a different account.",
+                        "oauthsignin": "Try signing in with a different account.",
+                        "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
+                        "oauthcallbackerror": "Try signing in with a different account.",
+                        "oauthcreateaccount": "Try signing in with a different account.",
+                        "emailcreateaccount": "Try signing in with a different account.",
+                        "callback": "Try signing in with a different account.",
+                        "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
+                        "emailsignin": "The e-mail could not be sent.",
+                        "emailverify": "Please verify your email, a new email has been sent.",
+                        "credentialssignin": "Sign in failed. Check the details you provided are correct.",
+                        "sessionrequired": "Please sign in to access this page."
+                    }
+                },
+                "authVerifyEmail": {
+                    "almostThere": "You're almost there! We've sent an email to ",
+                    "verifyEmailLink": "Please click on the link in that email to complete your signup.",
+                    "didNotReceive": "Can't find the email?",
+                    "resendEmail": "Resend email",
+                    "goBack": "Go Back",
+                    "emailSent": "Email sent successfully.",
+                    "verifyEmail": "Verify your email address"
+                },
+                "providerButton": {
+                    "continue": "Continue with {{provider}}",
+                    "signup": "Sign up with {{provider}}"
+                },
+                "authResetPassword": {
+                    "newPasswordRequired": "New password is a required field",
+                    "passwordsMustMatch": "Passwords must match",
+                    "confirmPasswordRequired": "Confirm password is a required field",
+                    "newPassword": "New password",
+                    "confirmPassword": "Confirm password",
+                    "resetPassword": "Reset Password"
+                },
+                "authForgotPassword": {
+                    "email": "Email address",
+                    "emailRequired": "email is a required field",
+                    "emailSent": "Please check the email address {{email}} for instructions to reset your password.",
+                    "enterEmail": "Enter your email address and we will send you instructions to reset your password.",
+                    "resendEmail": "Resend email",
+                    "continue": "Continue",
+                    "goBack": "Go Back"
+                }
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "showHistory": "Show history",
+                        "lastInputs": "Last Inputs",
+                        "noInputs": "Such empty...",
+                        "loading": "Loading..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Type your message here..."
+                    },
+                    "speechButton": {
+                        "start": "Start recording",
+                        "stop": "Stop recording"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Send message",
+                        "stopTask": "Stop Task"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Attach files"
+                    },
+                    "waterMark": {
+                        "text": "Built with"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Running",
+                        "executedSuccessfully": "executed successfully",
+                        "failed": "failed",
+                        "feedbackUpdated": "Feedback updated",
+                        "updating": "Updating"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Drop your files here"
+                },
+                "index": {
+                    "failedToUpload": "Failed to upload",
+                    "cancelledUploadOf": "Cancelled upload of",
+                    "couldNotReachServer": "Could not reach the server",
+                    "continuingChat": "Continuing previous chat"
+                },
+                "settings": {
+                    "settingsPanel": "Settings panel",
+                    "reset": "Reset",
+                    "cancel": "Cancel",
+                    "confirm": "Confirm"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: All",
+                            "feedbackPositive": "Feedback: Positive",
+                            "feedbackNegative": "Feedback: Negative"
+                        },
+                        "SearchBar": {
+                            "search": "Search"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "This will delete the thread as well as it's messages and elements.",
+                        "cancel": "Cancel",
+                        "confirm": "Confirm",
+                        "deletingChat": "Deleting chat",
+                        "chatDeleted": "Chat deleted"
+                    },
+                    "index": {
+                        "pastChats": "Past Chats"
+                    },
+                    "ThreadList": {
+                        "empty": "Empty...",
+                        "today": "Today",
+                        "yesterday": "Yesterday",
+                        "previous7days": "Previous 7 days",
+                        "previous30days": "Previous 30 days"
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Close sidebar",
+                        "openSidebar": "Open sidebar"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Go back to chat",
+                    "chatCreatedOn": "This chat was created on"
+                }
+            },
+            "header": {
+                "chat": "Chat",
+                "readme": "Readme"
+            }
+        }
+    },
+    "hooks": {
+        "useLLMProviders": {
+            "failedToFetchProviders": "Failed to fetch providers:"
+        }
+    },
+    "pages": {
+        "Design": {},
+        "Env": {
+            "savedSuccessfully": "Saved successfully",
+            "requiredApiKeys": "Required API Keys",
+            "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
+        },
+        "Page": {
+            "notPartOfProject": "You are not part of this project."
+        },
+        "ResumeButton": {
+            "resumeChat": "Resume Chat"
+        }
+    }
+}

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

__pycache__/starters.cpython-311.pyc ADDED Viewed

Binary file (1.15 kB). View file

app.py CHANGED Viewed

@@ -1,21 +1,16 @@
 # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
-import os
-from openai import AsyncOpenAI  # importing openai for API usage
 import chainlit as cl  # importing chainlit for our app
-from chainlit.prompt import Prompt, PromptMessage  # importing prompt tools
-#from chainlit.playground.providers import ChatOpenAI   # importing ChatOpenAI tools
 from dotenv import load_dotenv
-from chainlit.types import AskFileResponse
-import asyncio
 from langchain.document_loaders import PyMuPDFLoader, PyPDFLoader
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 import tiktoken
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_openai.embeddings import OpenAIEmbeddings
-from utils.custom_retriver import CustomQDrant, CustomVectorStoreRetriever
 load_dotenv()
@@ -30,14 +25,16 @@ CONTEXT:
 QUERY:
 {question}
-Answer questions only based on provided context and not your previous knowledge.
 In your answer never mention phrases like Based on provided context, From the context etc.
-If you don't know the answer say I don't know!
 """
 data_path = "data/airbnb_midterm.pdf"
 docs = PyMuPDFLoader(data_path).load()
-openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo") #gpt-4o
 def tiktoken_len(text):
     tokens = tiktoken.encoding_for_model("gpt-4o").encode(
@@ -46,7 +43,7 @@ def tiktoken_len(text):
     return len(tokens)
 text_splitter = RecursiveCharacterTextSplitter(
-    chunk_size = 500,
     chunk_overlap = 10,
     length_function = tiktoken_len,
 )
@@ -100,7 +97,7 @@ async def start_chat():
-    cl.user_session.set("chain", retrieval_augmented_qa_chain)
@@ -128,7 +125,7 @@ async def main(message: cl.Message):
         source_names = [text_el.name for text_el in text_elements]
     if source_names:
-        resp_msg += f"\nSources: {', '.join(source_names)}"
     else:
         resp_msg += "\nNo sources found"

 # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
 import chainlit as cl  # importing chainlit for our app
 from dotenv import load_dotenv
 from langchain.document_loaders import PyMuPDFLoader, PyPDFLoader
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 import tiktoken
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_openai.embeddings import OpenAIEmbeddings
+from utils.custom_retriver import CustomQDrant
+from starters import set_starters
 load_dotenv()
 QUERY:
 {question}
+Answer questions first based on provided context and if you can't find answer in provided context, use your previous knowledge.
 In your answer never mention phrases like Based on provided context, From the context etc.
+At the end of each answer add CONTEXT CONFIDENCE tag -> answer vs. context similarity score -> faithfulness - answer in percent e.g. 85%.
+Also add CONTEXT vs PRIOR tag: break answer to what you find in provided context and what you build from your prior knowledge.
 """
 data_path = "data/airbnb_midterm.pdf"
 docs = PyMuPDFLoader(data_path).load()
+openai_chat_model = ChatOpenAI(model="gpt-4o") #gpt-4o
 def tiktoken_len(text):
     tokens = tiktoken.encoding_for_model("gpt-4o").encode(
     return len(tokens)
 text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 200,
     chunk_overlap = 10,
     length_function = tiktoken_len,
 )
+    cl.user_session.set("chain", retrieval_augmented_qa_chain, )
         source_names = [text_el.name for text_el in text_elements]
     if source_names:
+        resp_msg += f"\n\nSources: {', '.join(source_names)}"
     else:
         resp_msg += "\nNo sources found"

chainlit.md CHANGED Viewed

@@ -2,8 +2,86 @@
 This app is developed for **AI Engineering bootcamp!**
-## Useful Links 🔗
-- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
-- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬

 This app is developed for **AI Engineering bootcamp!**
+### Prompt approach:
+**Classic:**
+```
+CONTEXT:
+{{context}}
+QUERY:
+{{question}}
+Answer questions only based on provided context and not your previous knowledge.
+In your answer never mention phrases like Based on provided context, From the context etc.
+If you don't know the answer say I don't know!
+```
+### My idea was to move away from this 101 RAG prompt apporach and have LLM give information regarding answer construction in a way - context vs. prior knowledge!
+This is still not the best approach, and I would prefer to use more advanced evaluation tehnics, but it helps us understand hallucinations better.
+Additionally, 4o model answers can understand this type of prompt, while GPT 3.5 gets confused between prior knowlege and provided context.
+**My test approach:**
+```
+CONTEXT:
+{{context}}
+QUERY:
+{{question}}
+Answer questions only based on provided context and not your previous knowledge.
+In your answer never mention phrases like Based on provided context, From the context etc.
+If you don't know the answer say I don't know!
+```
+## Question answers 🔗
+1.  **"What is Airbnb's 'Description of Business'?"**
+    Airbnb, Inc. was incorporated in Delaware in June 2008 and is headquartered in San Francisco, California. The company operates a global platform for unique stays and experiences. Its marketplace model connects hosts and guests (collectively referred to as “customers”) online or through mobile devices to book spaces and experiences around the world.
+    CONTEXT CONFIDENCE: 100%
+    CONTEXT vs PRIOR: The answer is entirely based on the provided context.
+    Sources: source_0 (scr: 0.58), source_1 (scr: 0.47), source_2 (scr: 0.45), source_3 (scr: 0.45)
+2.  **"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"**
+    The total value of 'Cash and cash equivalents' as of December 31, 2023, was $12,667 million.
+    CONTEXT CONFIDENCE: 100%
+    CONTEXT vs PRIOR:
+    - From context: The specific figure of $12,667 million for 'Cash and cash equivalents' as of December 31, 2023.
+    - Prior knowledge: None used.
+    Sources: source_0 (scr: 0.57), source_1 (scr: 0.54), source_2 (scr: 0.54), source_3 (scr: 0.53)
+3.  **"What is the 'maximum number of shares to be sold under the 10b5-1 Trading plan' by Brian Chesky?"**
+    The maximum number of shares to be sold under the 10b5-1 Trading Plan by Brian Chesky is 1,146,000.
+    CONTEXT CONFIDENCE: 100%
+    CONTEXT vs PRIOR:
+    - Context: The maximum number of shares to be sold under the 10b5-1 Trading Plan by Brian Chesky.
+    - Prior: None.
+    Sources: source_0 (scr: 0.64), source_1 (scr: 0.47), source_2 (scr: 0.45), source_3 (scr: 0.44)
+## Example: multiple context question
+4. **In what club Luka Modrić plays and who is Brian Chesky?**
+    Luka Modrić plays for Real Madrid, a professional football club based in Madrid, Spain. Brian Chesky is the Chief Executive Officer (CEO) of Airbnb, Inc.
+    CONTEXT CONFIDENCE: 100% for Brian Chesky, 0% for Luka Modrić.
+    CONTEXT vs PRIOR:
+    - **Context**: Brian Chesky is identified as the CEO of Airbnb, Inc.
+    - **Prior Knowledge**: Luka Modrić plays for Real Madrid.
+    Sources: source_0 (scr: 0.36), source_1 (scr: 0.32), source_2 (scr: 0.32), source_3 (scr: 0.32)

public/airbnb.svg ADDED Viewed

public/barfin.svg ADDED Viewed

public/fund.svg ADDED Viewed

public/light.svg ADDED Viewed

public/logo_dark.png CHANGED Viewed

public/soccer.svg ADDED Viewed

public/stylesheet.css CHANGED Viewed

@@ -2,6 +2,11 @@ img {
     max-height: 70px !important;
 }
 /* Hide the original text */
 .css-pcmo6i {
     visibility: hidden;
@@ -16,6 +21,10 @@ img {
     display: block;
 }
 img[alt="watermark"] {
     visibility: hidden;
 }

     max-height: 70px !important;
 }
+.css-8mm1u0 {
+    background-color: #288b8f !important;
+    color: white;
+}
 /* Hide the original text */
 .css-pcmo6i {
     visibility: hidden;
     display: block;
 }
+svg[viewBox="0 0 1143 266"] {
+    visibility: hidden;
+}
 img[alt="watermark"] {
     visibility: hidden;
 }

starters.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import chainlit as cl
+@cl.set_starters
+async def set_starters():
+    return [
+        cl.Starter(
+            label="AirBnB info",
+            message="What is Airbnb's 'Description of Business'?",
+            icon="/public/airbnb.svg",
+            ),
+        cl.Starter(
+            label="Cash and cash equivalents",
+            message="What was the total value of 'Cash and cash equivalents' as of December 31, 2023?",
+            icon="/public/barfin.svg",
+            ),
+        cl.Starter(
+            label="shares by Brian Chesky",
+            message="What is the 'maximum number of shares to be sold under the 10b5-1 Trading plan' by Brian Chesky?",
+            icon="/public/light.svg",
+            ),
+        cl.Starter(
+            label="Luka vs. Brian",
+            message="In what club Luka Modrić plays and who is Brian Chesky?",
+            icon="/public/soccer.svg",
+            )
+        ]
+# ...

utils/__pycache__/custom_retriver.cpython-311.pyc CHANGED Viewed

Binary files a/utils/__pycache__/custom_retriver.cpython-311.pyc and b/utils/__pycache__/custom_retriver.cpython-311.pyc differ