aaromosshf2424 commited on
Commit
58cc93f
·
1 Parent(s): 57b42b0

update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -39,7 +39,7 @@ HF_TOKEN = os.environ["HF_TOKEN"]
39
  """
40
  ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
41
  ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
42
- document_loader = TextLoader("data/paul_graham_essays.txt")
43
  documents = document_loader.load()
44
 
45
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
@@ -51,7 +51,7 @@ split_documents = text_splitter.split_documents(documents)
51
  hf_embeddings = HuggingFaceEndpointEmbeddings(
52
  model=HF_EMBED_ENDPOINT,
53
  task="feature-extraction",
54
- huggingfacehub_api_token=os.environ["HF_TOKEN"],
55
  )
56
 
57
  if os.path.exists("./data/vectorstore"):
@@ -65,13 +65,12 @@ if os.path.exists("./data/vectorstore"):
65
  else:
66
  print("Indexing Files")
67
  os.makedirs("./data/vectorstore", exist_ok=True)
68
- ### 4. INDEX FILES
69
- ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
70
  for i in range(0, len(split_documents), 32):
71
  if i == 0:
72
  vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
73
  continue
74
  vectorstore.add_documents(split_documents[i:i+32])
 
75
 
76
  hf_retriever = vectorstore.as_retriever()
77
 
@@ -103,14 +102,14 @@ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
103
  """
104
  ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
105
  hf_llm = HuggingFaceEndpoint(
106
- endpoint_url=f"{HF_LLM_ENDPOINT}",
107
  max_new_tokens=512,
108
  top_k=10,
109
  top_p=0.95,
110
  typical_p=0.95,
111
  temperature=0.01,
112
  repetition_penalty=1.03,
113
- huggingfacehub_api_token=os.environ["HF_TOKEN"]
114
  )
115
 
116
  @cl.author_rename
@@ -136,7 +135,10 @@ async def start_chat():
136
  """
137
 
138
  ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
139
- lcel_rag_chain = {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}| rag_prompt | hf_llm
 
 
 
140
 
141
  cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
142
 
 
39
  """
40
  ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
41
  ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
42
+ document_loader = TextLoader("./data/paul_graham_essays.txt")
43
  documents = document_loader.load()
44
 
45
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
 
51
  hf_embeddings = HuggingFaceEndpointEmbeddings(
52
  model=HF_EMBED_ENDPOINT,
53
  task="feature-extraction",
54
+ huggingfacehub_api_token=HF_TOKEN,
55
  )
56
 
57
  if os.path.exists("./data/vectorstore"):
 
65
  else:
66
  print("Indexing Files")
67
  os.makedirs("./data/vectorstore", exist_ok=True)
 
 
68
  for i in range(0, len(split_documents), 32):
69
  if i == 0:
70
  vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
71
  continue
72
  vectorstore.add_documents(split_documents[i:i+32])
73
+ vectorstore.save_local("./data/vectorstore")
74
 
75
  hf_retriever = vectorstore.as_retriever()
76
 
 
102
  """
103
  ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
104
  hf_llm = HuggingFaceEndpoint(
105
+ endpoint_url=HF_LLM_ENDPOINT,
106
  max_new_tokens=512,
107
  top_k=10,
108
  top_p=0.95,
109
  typical_p=0.95,
110
  temperature=0.01,
111
  repetition_penalty=1.03,
112
+ huggingfacehub_api_token=HF_TOKEN
113
  )
114
 
115
  @cl.author_rename
 
135
  """
136
 
137
  ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
138
+ lcel_rag_chain = (
139
+ {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
140
+ | rag_prompt | hf_llm
141
+ )
142
 
143
  cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
144