Ankitajadhav commited on
Commit
67deb0b
·
verified ·
1 Parent(s): 375f11c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -20
app.py CHANGED
@@ -63,27 +63,23 @@ ids = [str(i) for i in range(len(image_uris))]
63
 
64
  collection_images.add(ids=ids, uris=image_uris)
65
 
66
- # Path to the backup file
67
- load_path = 'text_collection_backup.json'
68
-
69
- # Load the data from the JSON file
70
- with open(load_path, 'r') as f:
71
- loaded_data = json.load(f)
72
-
73
- # Extract the documents and IDs
74
- loaded_documents = loaded_data['documents']
75
- loaded_ids = loaded_data['ids']
76
-
77
- # Assuming 'client' is already set up for ChromaDB
78
- # Create or get a collection to store the loaded data
79
- # collection_text = client.create_collection(
80
- # name='collection_text', # Ensure the collection name is consistent if required
81
- # embedding_function=default_ef # Use the same embedding function as before
82
- # )
83
- # Add data to the collection
84
  collection_text.add(
85
- documents=loaded_documents,
86
- ids=loaded_ids
87
  )
88
 
89
  # Initialize the transcriber
 
63
 
64
  collection_images.add(ids=ids, uris=image_uris)
65
 
66
+ # adding text collections
67
+
68
+ default_ef = embedding_functions.DefaultEmbeddingFunction()
69
+ TEXT_FOLDER = "text"
70
+ text_pth = sorted([os.path.join(TEXT_FOLDER, image_name) for image_name in os.listdir(TEXT_FOLDER) if image_name.endswith('.txt')])
71
+
72
+ list_of_text = []
73
+ for text in text_pth:
74
+ with open(text, 'r') as f:
75
+ text = f.read()
76
+ list_of_text.append(text)
77
+
78
+ ids_txt_list = ['id'+str(i) for i in range(len(list_of_text))]
79
+
 
 
 
 
80
  collection_text.add(
81
+ documents = list_of_text,
82
+ ids =ids_txt_list
83
  )
84
 
85
  # Initialize the transcriber