Namitg02 commited on
Commit
e96bccd
·
verified ·
1 Parent(s): 22e910b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -23,7 +23,7 @@ dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
23
  #dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
24
  #dataset = load_dataset("epfl-llm/guidelines", split='train')
25
  #Returns a list of dictionaries, each representing a row in the dataset.
26
- print(dataset[1])
27
  length = len(dataset)
28
 
29
  #Itemdetails = dataset.items()
@@ -35,9 +35,8 @@ embedding_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
35
 
36
  df = pd.DataFrame(dataset)
37
  print(df)
38
- embeddings = embedding_model.encode(dataset["text"])
39
  print(embeddings)
40
- df['embeddings'] = embeddings
41
  print(df)
42
  dataset = Dataset.from_pandas(df)
43
  print(dataset[1])
 
23
  #dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
24
  #dataset = load_dataset("epfl-llm/guidelines", split='train')
25
  #Returns a list of dictionaries, each representing a row in the dataset.
26
+ #print(dataset[1])
27
  length = len(dataset)
28
 
29
  #Itemdetails = dataset.items()
 
35
 
36
  df = pd.DataFrame(dataset)
37
  print(df)
38
+ df['embeddings'] = df['text'].apply(lambda x: embedding_model.encode(x))
39
  print(embeddings)
 
40
  print(df)
41
  dataset = Dataset.from_pandas(df)
42
  print(dataset[1])