barunsaha commited on
Commit
dfe9653
1 Parent(s): 7bd7f55

Use BERT mini to generate file name embeddings

Browse files
file_embeddings/embeddings.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c2758d74daff23f638acff398d1a512c74b97178118b7adbe8433ae8d368f52
3
- size 158848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96904aebf90e27e7996692899b658ba8fcc0aa7cc6b67dc1c484d8afd7d33b4c
3
+ size 317568
global_config.py CHANGED
@@ -35,7 +35,7 @@ class GlobalConfig:
35
 
36
  LLM_PROGRESS_MAX = 90
37
  ICONS_DIR = 'icons/png128/'
38
- TINY_BERT_MODEL = 'gaunernst/bert-tiny-uncased'
39
  EMBEDDINGS_FILE_NAME = 'file_embeddings/embeddings.npy'
40
  ICONS_FILE_NAME = 'file_embeddings/icons.npy'
41
 
 
35
 
36
  LLM_PROGRESS_MAX = 90
37
  ICONS_DIR = 'icons/png128/'
38
+ TINY_BERT_MODEL = 'gaunernst/bert-mini-uncased'
39
  EMBEDDINGS_FILE_NAME = 'file_embeddings/embeddings.npy'
40
  ICONS_FILE_NAME = 'file_embeddings/icons.npy'
41
 
helpers/icons_embeddings.py CHANGED
@@ -115,9 +115,48 @@ def main():
115
  # Run this again if icons are to be added/removed
116
  save_icons_embeddings()
117
 
118
- keywords = ['deep learning', 'library', 'universe', 'brain', 'cybersecurity', 'gaming', '']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  icon_files = find_icons(keywords)
120
- print(f'The relevant icon files are: {icon_files}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
 
123
  if __name__ == '__main__':
 
115
  # Run this again if icons are to be added/removed
116
  save_icons_embeddings()
117
 
118
+ keywords = [
119
+ 'deep learning',
120
+ '',
121
+ 'recycling',
122
+ 'handshake',
123
+ 'Ferry',
124
+ 'rain drop',
125
+ 'speech bubble',
126
+ 'mental resilience',
127
+ 'turmeric',
128
+ 'Art',
129
+ 'price tag',
130
+ 'Oxygen',
131
+ 'oxygen',
132
+ 'Social Connection',
133
+ 'Accomplishment',
134
+ ]
135
  icon_files = find_icons(keywords)
136
+ print(
137
+ f'The relevant icon files are:\n'
138
+ f'{list(zip(keywords, icon_files))}'
139
+ )
140
+
141
+ # BERT tiny:
142
+ # [('deep learning', 'deep-learning'), ('', '123'), ('recycling', 'refinery'),
143
+ # ('handshake', 'dash-circle'), ('Ferry', 'cart'), ('rain drop', 'bucket'),
144
+ # ('speech bubble', 'globe'), ('mental resilience', 'exclamation-triangle'),
145
+ # ('turmeric', 'kebab'), ('Art', 'display'), ('price tag', 'bug-fill'),
146
+ # ('Oxygen', 'radioactive')]
147
+
148
+ # BERT mini
149
+ # [('deep learning', 'deep-learning'), ('', 'compass'), ('recycling', 'tools'),
150
+ # ('handshake', 'bandaid'), ('Ferry', 'cart'), ('rain drop', 'trash'),
151
+ # ('speech bubble', 'image'), ('mental resilience', 'recycle'), ('turmeric', 'linkedin'),
152
+ # ('Art', 'book'), ('price tag', 'card-image'), ('Oxygen', 'radioactive')]
153
+
154
+ # BERT small
155
+ # [('deep learning', 'deep-learning'), ('', 'gem'), ('recycling', 'tools'),
156
+ # ('handshake', 'handbag'), ('Ferry', 'truck'), ('rain drop', 'bucket'),
157
+ # ('speech bubble', 'strategy'), ('mental resilience', 'deep-learning'),
158
+ # ('turmeric', 'flower'),
159
+ # ('Art', 'book'), ('price tag', 'hotdog'), ('Oxygen', 'radioactive')]
160
 
161
 
162
  if __name__ == '__main__':