Spaces:

Ankitajadhav
/

Moin_Von_Bremen

Sleeping

Ankitajadhav commited on Aug 13, 2024

Commit

67deb0b

verified ·

1 Parent(s): 375f11c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -63,27 +63,23 @@ ids = [str(i) for i in range(len(image_uris))]
 collection_images.add(ids=ids, uris=image_uris)
-# Path to the backup file
-load_path = 'text_collection_backup.json'
-# Load the data from the JSON file
-with open(load_path, 'r') as f:
-    loaded_data = json.load(f)
-# Extract the documents and IDs
-loaded_documents = loaded_data['documents']
-loaded_ids = loaded_data['ids']
-# Assuming 'client' is already set up for ChromaDB
-# Create or get a collection to store the loaded data
-# collection_text = client.create_collection(
-#     name='collection_text',  # Ensure the collection name is consistent if required
-#     embedding_function=default_ef  # Use the same embedding function as before
-# )
-# Add data to the collection
 collection_text.add(
-    documents=loaded_documents,
-    ids=loaded_ids
 )
 # Initialize the transcriber

 collection_images.add(ids=ids, uris=image_uris)
+# adding text collections
+default_ef = embedding_functions.DefaultEmbeddingFunction()
+TEXT_FOLDER = "text"
+text_pth = sorted([os.path.join(TEXT_FOLDER, image_name) for image_name in os.listdir(TEXT_FOLDER) if image_name.endswith('.txt')])
+list_of_text = []
+for text in text_pth:
+    with open(text, 'r') as f:
+        text = f.read()
+        list_of_text.append(text)
+ids_txt_list = ['id'+str(i) for i in range(len(list_of_text))]
 collection_text.add(
+    documents = list_of_text,
+    ids =ids_txt_list
 )
 # Initialize the transcriber