Spaces:

ahncs
/

htmllm

Runtime error

App Files Files Community

ahncs commited on Jul 18, 2024

Commit

bc96a72

1 Parent(s): 77fe871

Test

Browse files

Files changed (5) hide show

.chainlit/config.toml +84 -0
Dockerfile +12 -0
app.py +123 -0
chainlit.md +2 -0
requirements.txt +14 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,84 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "0.7.700"

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.11.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+RUN mkdir -p $HOME/app/data/vectorstore && chown -R user:user $HOME/app/data
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os
+import openai
+import chainlit as cl
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_openai import OpenAIEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain_community.vectorstores import Qdrant
+from langchain.prompts import ChatPromptTemplate
+from dotenv import load_dotenv
+from operator import itemgetter
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
+from langchain_core.prompts import PromptTemplate
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.runnable.config import RunnableConfig
+#Load environment variables
+load_dotenv()
+OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+#Load 10-K PDF and split into chunks
+loader = PyMuPDFLoader (
+    "./data/AirBNB10kfilingsq12024.pdf"
+)
+documents = loader.load()
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 1000,
+    chunk_overlap = 100
+)
+documents = text_splitter.split_documents(documents)
+#Load embeddings model - we'll use OpenAI's text-embedding-3-small
+embeddings = OpenAIEmbeddings(
+    model="text-embedding-3-small"
+)
+#Create QDrant vector store
+qdrant_vector_store = Qdrant.from_documents(
+    documents,
+    embeddings,
+    location=":memory:",
+    collection_name="AirBNB10k",
+)
+#Create Retriever
+retriever = qdrant_vector_store.as_retriever()
+#Create Prompt Template
+template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
+Context:
+{context}
+Question:
+{question}
+"""
+prompt = ChatPromptTemplate.from_template(template)
+#Choose LLM - we'll use gpt-4o.
+primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
+#Set up Chainlit
+@cl.author_rename
+def rename(original_author: str):
+    """
+    This function can be used to rename the 'author' of a message.
+    In this case, we're overriding the 'Assistant' author to be 'Airbnb10kBot'.
+    """
+    rename_dict = {
+        "Assistant" : "Airbnb10kBot"
+    }
+    return rename_dict.get(original_author, original_author)
+@cl.on_chat_start
+async def start_chat():
+    """
+    This function will be called at the start of every user session.
+    We will build our LCEL RAG chain here, and store it in the user session.
+    The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
+    """
+    retrieval_augmented_chain = (
+        # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
+        # "question" : populated by getting the value of the "question" key
+        # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
+        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
+        | prompt | primary_llm
+    )
+    cl.user_session.set("retrieval_augmented_chain", retrieval_augmented_chain)
+@cl.on_message
+async def main(message: cl.Message):
+    """
+    This function will be called every time a message is recieved from a session.
+    We will use the LCEL RAG chain to generate a response to the user query.
+    The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
+    """
+    retrieval_augmented_chain = cl.user_session.get("retrieval_augmented_chain")
+    msg = cl.Message(content="")
+    async for chunk in retrieval_augmented_chain.astream(
+        {"question": message.content},
+        config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
+    ):
+        await msg.stream_token(chunk.content)
+    await msg.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # HTM LLM - Service Manual - Connex 6000 Welch Allyn Vitals Sign Monitor
2	+ # Ask me anything about the service manual!

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+chainlit==0.7.700
+langchain==0.2.5
+langchain_community==0.2.5
+langchain_core==0.2.9
+langchain_huggingface==0.0.3
+langchain_text_splitters==0.2.1
+python-dotenv==1.0.1
+langchain-openai
+langchainhub
+openai
+faiss-cpu
+qdrant-client
+pymupdf
+pandas