Spaces:

northstaranlyticsma24
/

aie4-wk8d2

Paused

App Files Files Community

aaromosshf2424 commited on Oct 7, 2024

Commit

58cc93f

1 Parent(s): 57b42b0

update app.py

Browse files

Files changed (1) hide show

app.py +9 -7

app.py CHANGED Viewed

@@ -39,7 +39,7 @@ HF_TOKEN = os.environ["HF_TOKEN"]
 """
 ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
 ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
-document_loader = TextLoader("data/paul_graham_essays.txt")
 documents = document_loader.load()
 ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
@@ -51,7 +51,7 @@ split_documents = text_splitter.split_documents(documents)
 hf_embeddings = HuggingFaceEndpointEmbeddings(
     model=HF_EMBED_ENDPOINT,
     task="feature-extraction",
-    huggingfacehub_api_token=os.environ["HF_TOKEN"],
 )
 if os.path.exists("./data/vectorstore"):
@@ -65,13 +65,12 @@ if os.path.exists("./data/vectorstore"):
 else:
     print("Indexing Files")
     os.makedirs("./data/vectorstore", exist_ok=True)
-    ### 4. INDEX FILES
-    ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
     for i in range(0, len(split_documents), 32):
         if i == 0:
             vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
             continue
         vectorstore.add_documents(split_documents[i:i+32])
 hf_retriever = vectorstore.as_retriever()
@@ -103,14 +102,14 @@ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
 """
 ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
 hf_llm = HuggingFaceEndpoint(
-    endpoint_url=f"{HF_LLM_ENDPOINT}",
     max_new_tokens=512,
     top_k=10,
     top_p=0.95,
     typical_p=0.95,
     temperature=0.01,
     repetition_penalty=1.03,
-    huggingfacehub_api_token=os.environ["HF_TOKEN"]
 )
 @cl.author_rename
@@ -136,7 +135,10 @@ async def start_chat():
     """
     ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
-    lcel_rag_chain = {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}| rag_prompt | hf_llm
     cl.user_session.set("lcel_rag_chain", lcel_rag_chain)

 """
 ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
 ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
+document_loader = TextLoader("./data/paul_graham_essays.txt")
 documents = document_loader.load()
 ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
 hf_embeddings = HuggingFaceEndpointEmbeddings(
     model=HF_EMBED_ENDPOINT,
     task="feature-extraction",
+    huggingfacehub_api_token=HF_TOKEN,
 )
 if os.path.exists("./data/vectorstore"):
 else:
     print("Indexing Files")
     os.makedirs("./data/vectorstore", exist_ok=True)
     for i in range(0, len(split_documents), 32):
         if i == 0:
             vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
             continue
         vectorstore.add_documents(split_documents[i:i+32])
+    vectorstore.save_local("./data/vectorstore")
 hf_retriever = vectorstore.as_retriever()
 """
 ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
 hf_llm = HuggingFaceEndpoint(
+    endpoint_url=HF_LLM_ENDPOINT,
     max_new_tokens=512,
     top_k=10,
     top_p=0.95,
     typical_p=0.95,
     temperature=0.01,
     repetition_penalty=1.03,
+    huggingfacehub_api_token=HF_TOKEN
 )
 @cl.author_rename
     """
     ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
+    lcel_rag_chain = (
+        {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
+        | rag_prompt | hf_llm
+    )
     cl.user_session.set("lcel_rag_chain", lcel_rag_chain)