Neda1 commited on
Commit
91877b0
Β·
verified Β·
1 Parent(s): d484a74

Create upload_to_supabase.py

Browse files
Files changed (1) hide show
  1. upload_to_supabase.py +36 -0
upload_to_supabase.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from tqdm import tqdm
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import SupabaseVectorStore
6
+ from langchain.schema.document import Document
7
+ from supabase import create_client, Client
8
+
9
+ # --- Load Environment Variables ---
10
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
11
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
12
+
13
+ # --- Init Supabase & Embeddings ---
14
+ supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
15
+ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") # Or OpenAIEmbeddings if you use Groq
16
+
17
+ # --- Read CSV File ---
18
+ df = pd.read_csv("supabase_docs.csv") # Assuming columns: 'content', 'metadata' or just 'content'
19
+
20
+ # --- Convert rows to LangChain Document objects ---
21
+ documents = []
22
+ for _, row in tqdm(df.iterrows(), total=len(df)):
23
+ content = str(row["content"])
24
+ metadata = row.drop("content").to_dict() if "content" in row else {}
25
+ documents.append(Document(page_content=content, metadata=metadata))
26
+
27
+ # --- Create Supabase Vector Store and Upload ---
28
+ vectorstore = SupabaseVectorStore.from_documents(
29
+ documents=documents,
30
+ embedding=embedding_model,
31
+ client=supabase,
32
+ table_name="documents",
33
+ query_name="match_documents_langchain"
34
+ )
35
+
36
+ print("βœ… Upload complete.")