Spaces:
Running
Running
import pandas as pd | |
from buster.documents_manager import DeepLakeDocumentsManager | |
if __name__ == "__main__": | |
vector_store_path = "deeplake_store" | |
chunk_file = "data/output.csv" | |
overwrite = True | |
df = pd.read_csv(chunk_file) | |
# some pre-processing based on the latest file provided | |
df["url"] = df["source"] | |
df["source"] = "towardsai_blog" | |
df = df.dropna() | |
dm = DeepLakeDocumentsManager(vector_store_path, overwrite=overwrite) | |
dm.batch_add(df) | |
zipped_file_path = dm.to_zip() | |
print(f"Contents zipped to: {zipped_file_path}") | |