Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import uuid
|
3 |
+
import pandas as pd
|
4 |
+
from qdrant_client import QdrantClient, models
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
|
7 |
+
# === Step 1: Ensure Qdrant directory exists ===
|
8 |
+
if not os.path.exists("qdrant_data"):
|
9 |
+
os.makedirs("qdrant_data")
|
10 |
+
|
11 |
+
# === Step 2: Load dataset ===
|
12 |
+
data = pd.read_csv("math_dataset(2).csv") # Ensure this CSV is present and formatted correctly
|
13 |
+
|
14 |
+
# === Step 3: Encode questions ===
|
15 |
+
embedding_model = SentenceTransformer("intfloat/e5-large")
|
16 |
+
vectors = embedding_model.encode(data["problem"].tolist(), show_progress_bar=True)
|
17 |
+
|
18 |
+
# === Step 4: Initialize local Qdrant client ===
|
19 |
+
client = QdrantClient(path="qdrant_data")
|
20 |
+
|
21 |
+
# === Step 5: Create collection (recreate ensures it's fresh) ===
|
22 |
+
collection_name = "math_problems"
|
23 |
+
client.recreate_collection(
|
24 |
+
collection_name=collection_name,
|
25 |
+
vectors_config=models.VectorParams(size=vectors.shape[1], distance=models.Distance.COSINE)
|
26 |
+
)
|
27 |
+
|
28 |
+
# === Step 6: Prepare payload and upload with UUIDs ===
|
29 |
+
payload = data.to_dict(orient="records")
|
30 |
+
ids = [str(uuid.uuid4()) for _ in range(len(vectors))]
|
31 |
+
|
32 |
+
client.upload_collection(
|
33 |
+
collection_name=collection_name,
|
34 |
+
vectors=vectors,
|
35 |
+
payload=payload,
|
36 |
+
ids=ids
|
37 |
+
)
|
38 |
+
|
39 |
+
print("✅ Qdrant vector store created and populated successfully in `qdrant_data/`.")
|