Spaces:

jeevanions
/

hf-lm-hosting-rag

Sleeping

App Files Files Community

jeevan commited on Oct 4, 2024

Commit

ef80283

1 Parent(s): f4ae443

locally working

Browse files

Files changed (3) hide show

.chainlit/config.toml +84 -0
app.py +25 -51
chainlit.md +14 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,84 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "0.7.700"

app.py CHANGED Viewed

@@ -1,7 +1,3 @@
-### Import Section ###
-"""
-IMPORTS HERE
-"""
 import os
 import uuid
 from dotenv import load_dotenv
@@ -9,32 +5,22 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import PyMuPDFLoader
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import Distance, VectorParams
-from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain.storage import LocalFileStore
 from langchain_qdrant import QdrantVectorStore
 from langchain.embeddings import CacheBackedEmbeddings
-from langchain_core.prompts import ChatPromptTemplate
 from chainlit.types import AskFileResponse
-from langchain_core.globals import set_llm_cache
-from langchain_openai import ChatOpenAI
-from langchain_core.caches import InMemoryCache
 from operator import itemgetter
 from langchain_core.runnables.passthrough import RunnablePassthrough
 import chainlit as cl
 from langchain_core.runnables.config import RunnableConfig
-from langchain_community.llms import HuggingFaceEndpoint
 from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
 from langchain_core.prompts import PromptTemplate
-import numpy as np
-from numpy.linalg import norm
 load_dotenv()
-### Global Section ###
-"""
-GLOBAL CODE HERE
-"""
 RAG_PROMPT_TEMPLATE = """\
 <|start_header_id|>system<|end_header_id|>
@@ -49,10 +35,11 @@ Context:
 <|start_header_id|>assistant<|end_header_id|>
 """
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 hf_llm = HuggingFaceEndpoint(
-    endpoint_url=os.environ["YOUR_LLM_ENDPOINT_URL"],
-    max_new_tokens=512,
     top_k=10,
     top_p=0.95,
     typical_p=0.95,
@@ -67,12 +54,6 @@ hf_embeddings = HuggingFaceEndpointEmbeddings(
 )
 rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
-rag_chain = rag_prompt | hf_llm
-def cosine_similarity(phrase_1, phrase_2):
-  vec_1 = hf_embeddings.embed_documents([phrase_1])[0]
-  vec2_2 = hf_embeddings.embed_documents([phrase_2])[0]
-  return np.dot(vec_1, vec2_2) / (norm(vec_1) * norm(vec2_2))
 def process_file(file: AskFileResponse):
     import tempfile
@@ -91,15 +72,11 @@ def process_file(file: AskFileResponse):
     return docs
-### On Chat Start (Session Start) Section ###
 @cl.on_chat_start
 async def on_chat_start():
-    """ SESSION SPECIFIC CODE HERE """
     files = None
     while files == None:
-        # Async method: This allows the function to pause execution while waiting for the user to upload a file,
-        # without blocking the entire application. It improves responsiveness and scalability.
         files = await cl.AskFileMessage(
             content="Please upload a PDF file to begin!",
             accept=["application/pdf"],
@@ -115,38 +92,36 @@ async def on_chat_start():
     await msg.send()
     docs = process_file(file)
-    # Typical QDrant Client Set-up
     collection_name = f"pdf_to_parse_{uuid.uuid4()}"
     client = QdrantClient(":memory:")
     client.create_collection(
         collection_name=collection_name,
-        vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
     )
     # Adding cache!
-    store = LocalFileStore("./cache/")
-    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
-        hf_embeddings, store, namespace=hf_embeddings.model
-    )
     # Typical QDrant Vector Store Set-up
     vectorstore = QdrantVectorStore(
         client=client,
         collection_name=collection_name,
-        embedding=cached_embedder)
     for i in range(0, len(docs), 32):
         if i == 0:
-            vectorstore = docs.from_documents(docs[i:i+32], hf_embeddings)
             continue
-        vectorstore.add_documents(docs[i:i+32])
-    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
     retrieval_augmented_qa_chain = (
-        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
-        | RunnablePassthrough.assign(context=itemgetter("context"))
-        | rag_prompt | hf_llm
     )
     # Let the user know that the system is ready
@@ -166,19 +141,18 @@ def rename(orig_author: str):
 ### On Message Section ###
 @cl.on_message
 async def main(message: cl.Message):
-    """
-    MESSAGE CODE HERE
-    """
     runnable = cl.user_session.get("chain")
     msg = cl.Message(content="")
-    # Async method: Using astream allows for asynchronous streaming of the response,
-    # improving responsiveness and user experience by showing partial results as they become available.
     async for chunk in runnable.astream(
-        {"question": message.content},
         config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
     ):
-        await msg.stream_token(chunk.content)
-    await msg.send()

 import os
 import uuid
 from dotenv import load_dotenv
 from langchain_community.document_loaders import PyMuPDFLoader
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import Distance, VectorParams
 from langchain.storage import LocalFileStore
 from langchain_qdrant import QdrantVectorStore
 from langchain.embeddings import CacheBackedEmbeddings
 from chainlit.types import AskFileResponse
 from operator import itemgetter
 from langchain_core.runnables.passthrough import RunnablePassthrough
 import chainlit as cl
 from langchain_core.runnables.config import RunnableConfig
+from langchain_huggingface import HuggingFaceEndpoint
 from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
 from langchain_core.prompts import PromptTemplate
 load_dotenv()
+YOUR_LLM_ENDPOINT_URL = os.environ["YOUR_LLM_ENDPOINT_URL"]
+YOUR_EMBED_MODEL_URL = os.environ["YOUR_EMBED_MODEL_URL"]
 RAG_PROMPT_TEMPLATE = """\
 <|start_header_id|>system<|end_header_id|>
 <|start_header_id|>assistant<|end_header_id|>
 """
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)
 hf_llm = HuggingFaceEndpoint(
+    endpoint_url=f"{YOUR_LLM_ENDPOINT_URL}",
+    max_new_tokens=300,
     top_k=10,
     top_p=0.95,
     typical_p=0.95,
 )
 rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
 def process_file(file: AskFileResponse):
     import tempfile
     return docs
 @cl.on_chat_start
 async def on_chat_start():
     files = None
     while files == None:
         files = await cl.AskFileMessage(
             content="Please upload a PDF file to begin!",
             accept=["application/pdf"],
     await msg.send()
     docs = process_file(file)
+    # QDrant Client Set-up
     collection_name = f"pdf_to_parse_{uuid.uuid4()}"
     client = QdrantClient(":memory:")
     client.create_collection(
         collection_name=collection_name,
+        vectors_config=VectorParams(size=768, distance=Distance.COSINE),
     )
     # Adding cache!
+    # store = LocalFileStore("./cache/")
+    # cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+    #     hf_embeddings, store, namespace=hf_embeddings.model
+    # )
     # Typical QDrant Vector Store Set-up
     vectorstore = QdrantVectorStore(
         client=client,
         collection_name=collection_name,
+        embedding=hf_embeddings)
+    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
     for i in range(0, len(docs), 32):
         if i == 0:
+            retriever.add_documents(docs[i:i+32])
             continue
+        retriever.add_documents(docs[i:i+32])
     retrieval_augmented_qa_chain = (
+       {"context": itemgetter("query") | retriever, "query": itemgetter("query")}| rag_prompt | hf_llm
     )
     # Let the user know that the system is ready
 ### On Message Section ###
 @cl.on_message
 async def main(message: cl.Message):
     runnable = cl.user_session.get("chain")
     msg = cl.Message(content="")
     async for chunk in runnable.astream(
+        {"query": message.content},
         config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
     ):
+        await msg.stream_token(chunk)
+    await msg.send()
+if __name__ == "__main__":
+    from chainlit.cli import run_chainlit
+    run_chainlit(__file__)

chainlit.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Welcome to Chainlit! 🚀🤖
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+## Useful Links 🔗
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+## Welcome screen
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.