manasagangotri commited on
Commit
f1ba123
·
verified ·
1 Parent(s): b40aeb9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import pandas as pd
4
+ from qdrant_client import QdrantClient, models
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ # === Step 1: Ensure Qdrant directory exists ===
8
+ if not os.path.exists("qdrant_data"):
9
+ os.makedirs("qdrant_data")
10
+
11
+ # === Step 2: Load dataset ===
12
+ data = pd.read_csv("math_dataset(2).csv") # Ensure this CSV is present and formatted correctly
13
+
14
+ # === Step 3: Encode questions ===
15
+ embedding_model = SentenceTransformer("intfloat/e5-large")
16
+ vectors = embedding_model.encode(data["problem"].tolist(), show_progress_bar=True)
17
+
18
+ # === Step 4: Initialize local Qdrant client ===
19
+ client = QdrantClient(path="qdrant_data")
20
+
21
+ # === Step 5: Create collection (recreate ensures it's fresh) ===
22
+ collection_name = "math_problems"
23
+ client.recreate_collection(
24
+ collection_name=collection_name,
25
+ vectors_config=models.VectorParams(size=vectors.shape[1], distance=models.Distance.COSINE)
26
+ )
27
+
28
+ # === Step 6: Prepare payload and upload with UUIDs ===
29
+ payload = data.to_dict(orient="records")
30
+ ids = [str(uuid.uuid4()) for _ in range(len(vectors))]
31
+
32
+ client.upload_collection(
33
+ collection_name=collection_name,
34
+ vectors=vectors,
35
+ payload=payload,
36
+ ids=ids
37
+ )
38
+
39
+ print("✅ Qdrant vector store created and populated successfully in `qdrant_data/`.")