MikeCraBash commited on
Commit
69b4c32
·
verified ·
1 Parent(s): 90feb87

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -132
app.py DELETED
@@ -1,132 +0,0 @@
1
- import os
2
- import chainlit as cl
3
- from dotenv import load_dotenv
4
- from operator import itemgetter
5
- from langchain_huggingface import HuggingFaceEndpoint
6
- from langchain_community.document_loaders import TextLoader
7
- from langchain_text_splitters import RecursiveCharacterTextSplitter
8
- from langchain_community.vectorstores import FAISS
9
- from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
- from langchain_core.prompts import PromptTemplate
11
- from langchain.schema.output_parser import StrOutputParser
12
- from langchain.schema.runnable import RunnablePassthrough
13
- from langchain.schema.runnable.config import RunnableConfig
14
-
15
- # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
16
- # ---- ENV VARIABLES ---- #
17
- """
18
- This function will load our environment file (.env) if it is present.
19
-
20
- NOTE: Make sure that .env is in your .gitignore file - it is by default, but please ensure it remains there.
21
- """
22
- load_dotenv()
23
-
24
- """
25
- We will load our environment variables here.
26
- """
27
- HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
28
- HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
29
- HF_TOKEN = os.environ["HF_TOKEN"]
30
-
31
- # ---- GLOBAL DECLARATIONS ---- #
32
-
33
- # -- RETRIEVAL -- #
34
- """
35
- 1. Load Documents from Text File
36
- 2. Split Documents into Chunks
37
- 3. Load HuggingFace Embeddings (remember to use the URL we set above)
38
- 4. Index Files if they do not exist, otherwise load the vectorstore
39
- """
40
- ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
41
- ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
42
- text_loader =
43
- documents =
44
-
45
- ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
46
- text_splitter =
47
- split_documents =
48
-
49
- ### 3. LOAD HUGGINGFACE EMBEDDINGS
50
- hf_embeddings =
51
-
52
- if os.path.exists("./data/vectorstore"):
53
- vectorstore = FAISS.load_local(
54
- "./data/vectorstore",
55
- hf_embeddings,
56
- allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
57
- )
58
- hf_retriever = vectorstore.as_retriever()
59
- print("Loaded Vectorstore")
60
- else:
61
- print("Indexing Files")
62
- os.makedirs("./data/vectorstore", exist_ok=True)
63
- ### 4. INDEX FILES
64
- ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
65
-
66
- hf_retriever = vectorstore.as_retriever()
67
-
68
- # -- AUGMENTED -- #
69
- """
70
- 1. Define a String Template
71
- 2. Create a Prompt Template from the String Template
72
- """
73
- ### 1. DEFINE STRING TEMPLATE
74
- RAG_PROMPT_TEMPLATE =
75
-
76
- ### 2. CREATE PROMPT TEMPLATE
77
- rag_prompt =
78
-
79
- # -- GENERATION -- #
80
- """
81
- 1. Create a HuggingFaceEndpoint for the LLM
82
- """
83
- ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
84
- hf_llm =
85
-
86
- @cl.author_rename
87
- def rename(original_author: str):
88
- """
89
- This function can be used to rename the 'author' of a message.
90
-
91
- In this case, we're overriding the 'Assistant' author to be 'Paul Graham Essay Bot'.
92
- """
93
- rename_dict = {
94
- "Assistant" : "Paul Graham Essay Bot"
95
- }
96
- return rename_dict.get(original_author, original_author)
97
-
98
- @cl.on_chat_start
99
- async def start_chat():
100
- """
101
- This function will be called at the start of every user session.
102
-
103
- We will build our LCEL RAG chain here, and store it in the user session.
104
-
105
- The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
106
- """
107
-
108
- ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
109
- lcel_rag_chain =
110
-
111
- cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
112
-
113
- @cl.on_message
114
- async def main(message: cl.Message):
115
- """
116
- This function will be called every time a message is recieved from a session.
117
-
118
- We will use the LCEL RAG chain to generate a response to the user query.
119
-
120
- The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
121
- """
122
- lcel_rag_chain = cl.user_session.get("lcel_rag_chain")
123
-
124
- msg = cl.Message(content="")
125
-
126
- async for chunk in lcel_rag_chain.astream(
127
- {"query": message.content},
128
- config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
129
- ):
130
- await msg.stream_token(chunk)
131
-
132
- await msg.send()