tasmimulhuda commited on
Commit
6a7d999
·
verified ·
1 Parent(s): 4bba01b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py CHANGED
@@ -5,6 +5,8 @@ import requests
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
  from agent import build_graph
 
 
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
@@ -183,6 +185,34 @@ if __name__ == "__main__":
183
  space_host_startup = os.getenv("SPACE_HOST")
184
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  if space_host_startup:
187
  print(f"✅ SPACE_HOST found: {space_host_startup}")
188
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
 
5
  import pandas as pd
6
  from langchain_core.messages import HumanMessage
7
  from agent import build_graph
8
+ from langchain_core.documents import Document
9
+ from langchain_community.vectorstores import Chroma
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
 
185
  space_host_startup = os.getenv("SPACE_HOST")
186
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
187
 
188
+ # Load metadata.jsonl
189
+ import json
190
+ # Load the metadata.jsonl file
191
+ with open('metadata.jsonl', 'r') as jsonl_file:
192
+ json_list = list(jsonl_file)
193
+
194
+ json_QA = []
195
+ for json_str in json_list:
196
+ json_data = json.loads(json_str)
197
+ json_QA.append(json_data)
198
+
199
+ # 1. Prepare your documents (same format as before)
200
+ documents = [
201
+ Document(
202
+ page_content=f"Q: {q['Question']}\nA: {q['Final answer']}",
203
+ metadata={"source": q['task_id']}
204
+ ) for q in json_QA
205
+ ]
206
+
207
+ # 2. Initialize ChromaDB
208
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
209
+ vector_store = Chroma.from_documents(
210
+ collection_name = 'documents',
211
+ documents=documents,
212
+ embedding=embeddings,
213
+ persist_directory="./vector_db" # Omit for in-memory only
214
+ )
215
+
216
  if space_host_startup:
217
  print(f"✅ SPACE_HOST found: {space_host_startup}")
218
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")