Michaeldavidstein commited on
Commit
a4e00f7
·
verified ·
1 Parent(s): 87ec5ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -12,17 +12,10 @@ model = SentenceTransformer('all-MiniLM-L6-v2')
12
  # Initialize the ChromaDB client
13
  client = chromadb.Client()
14
 
15
- # Function to build the database from CSV
16
  def build_database():
17
  # Read the CSV file
18
  df = pd.read_csv('collection_data.csv')
19
 
20
- # Drop unnecessary columns if they exist
21
- if 'uris' in df.columns:
22
- df.drop('uris', axis=1, inplace=True)
23
- if 'data' in df.columns:
24
- df.drop('data', axis=1, inplace=True)
25
-
26
  # Create a collection
27
  collection_name = 'Dataset-10k-companies'
28
 
@@ -33,12 +26,21 @@ def build_database():
33
  # Create a new collection
34
  collection = client.create_collection(name=collection_name)
35
 
 
 
 
 
 
 
 
 
 
36
  # Add the data from the DataFrame to the collection
37
  collection.add(
38
  documents=df['documents'].tolist(),
39
  ids=df['ids'].tolist(),
40
  metadatas=df['metadatas'].apply(eval).tolist(),
41
- embeddings=df['embeddings'].apply(lambda x: eval(x.replace(',,', ','))).tolist()
42
  )
43
 
44
  return collection
 
12
  # Initialize the ChromaDB client
13
  client = chromadb.Client()
14
 
 
15
  def build_database():
16
  # Read the CSV file
17
  df = pd.read_csv('collection_data.csv')
18
 
 
 
 
 
 
 
19
  # Create a collection
20
  collection_name = 'Dataset-10k-companies'
21
 
 
26
  # Create a new collection
27
  collection = client.create_collection(name=collection_name)
28
 
29
+ # Function to safely process embeddings
30
+ def process_embedding(x):
31
+ if isinstance(x, str):
32
+ return eval(x.replace(',,', ','))
33
+ elif isinstance(x, float):
34
+ return [] # or some default value
35
+ else:
36
+ return x
37
+
38
  # Add the data from the DataFrame to the collection
39
  collection.add(
40
  documents=df['documents'].tolist(),
41
  ids=df['ids'].tolist(),
42
  metadatas=df['metadatas'].apply(eval).tolist(),
43
+ embeddings=df['embeddings'].apply(process_embedding).tolist()
44
  )
45
 
46
  return collection