import os import pandas as pd from tqdm import tqdm from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import SupabaseVectorStore from langchain.schema.document import Document from supabase import create_client, Client # --- Load Environment Variables --- SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_KEY = os.getenv("SUPABASE_KEY") # --- Init Supabase & Embeddings --- supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # Or OpenAIEmbeddings if you use Groq # --- Read CSV File --- df = pd.read_csv("supabase_docs.csv") # Assuming columns: 'content', 'metadata' or just 'content' # --- Convert rows to LangChain Document objects --- documents = [] for _, row in tqdm(df.iterrows(), total=len(df)): content = str(row["content"]) metadata = row.drop("content").to_dict() if "content" in row else {} documents.append(Document(page_content=content, metadata=metadata)) # --- Create Supabase Vector Store and Upload --- vectorstore = SupabaseVectorStore.from_documents( documents=documents, embedding=embeddings, client=supabase, table_name="documents", query_name="match_documents_langchain" ) print("✅ Upload complete.")