Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ import requests
|
|
5 |
import pandas as pd
|
6 |
from langchain_core.messages import HumanMessage
|
7 |
from agent import build_graph
|
|
|
|
|
8 |
|
9 |
# (Keep Constants as is)
|
10 |
# --- Constants ---
|
@@ -183,6 +185,34 @@ if __name__ == "__main__":
|
|
183 |
space_host_startup = os.getenv("SPACE_HOST")
|
184 |
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
if space_host_startup:
|
187 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
188 |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
|
|
5 |
import pandas as pd
|
6 |
from langchain_core.messages import HumanMessage
|
7 |
from agent import build_graph
|
8 |
+
from langchain_core.documents import Document
|
9 |
+
from langchain_community.vectorstores import Chroma
|
10 |
|
11 |
# (Keep Constants as is)
|
12 |
# --- Constants ---
|
|
|
185 |
space_host_startup = os.getenv("SPACE_HOST")
|
186 |
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
187 |
|
188 |
+
# Load metadata.jsonl
|
189 |
+
import json
|
190 |
+
# Load the metadata.jsonl file
|
191 |
+
with open('metadata.jsonl', 'r') as jsonl_file:
|
192 |
+
json_list = list(jsonl_file)
|
193 |
+
|
194 |
+
json_QA = []
|
195 |
+
for json_str in json_list:
|
196 |
+
json_data = json.loads(json_str)
|
197 |
+
json_QA.append(json_data)
|
198 |
+
|
199 |
+
# 1. Prepare your documents (same format as before)
|
200 |
+
documents = [
|
201 |
+
Document(
|
202 |
+
page_content=f"Q: {q['Question']}\nA: {q['Final answer']}",
|
203 |
+
metadata={"source": q['task_id']}
|
204 |
+
) for q in json_QA
|
205 |
+
]
|
206 |
+
|
207 |
+
# 2. Initialize ChromaDB
|
208 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
|
209 |
+
vector_store = Chroma.from_documents(
|
210 |
+
collection_name = 'documents',
|
211 |
+
documents=documents,
|
212 |
+
embedding=embeddings,
|
213 |
+
persist_directory="./vector_db" # Omit for in-memory only
|
214 |
+
)
|
215 |
+
|
216 |
if space_host_startup:
|
217 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
218 |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|