milyiyo commited on
Commit
0f5a30d
·
1 Parent(s): 5f0ab2a

Replace DatasetDict by Dataset

Browse files
Files changed (1) hide show
  1. functions.py +3 -2
functions.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  from bs4 import BeautifulSoup
6
  from peft import PeftConfig, PeftModel
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, AutoModel
8
- from datasets import DatasetDict
9
 
10
  # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
11
 
@@ -57,7 +57,8 @@ def build_faiss_index(text):
57
  emb_list = []
58
  for item in text_list:
59
  emb_list.append({"embeddings": get_embeddings(item)})
60
- dataset = DatasetDict({'train': emb_list})
 
61
  dataset.add_faiss_index(column="embeddings")
62
  shared['embeddings_dataset'] = dataset
63
  print(['build_faiss_index', 'end'])
 
5
  from bs4 import BeautifulSoup
6
  from peft import PeftConfig, PeftModel
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, AutoModel
8
+ from datasets import DatasetDict, Dataset
9
 
10
  # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
11
 
 
57
  emb_list = []
58
  for item in text_list:
59
  emb_list.append({"embeddings": get_embeddings(item)})
60
+ # dataset = DatasetDict({'train': emb_list})
61
+ dataset = Dataset.from_dict(emb_list)
62
  dataset.add_faiss_index(column="embeddings")
63
  shared['embeddings_dataset'] = dataset
64
  print(['build_faiss_index', 'end'])