Namitg02 commited on
Commit
49f4c57
·
verified ·
1 Parent(s): 321864f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -5,6 +5,7 @@ from sentence_transformers import SentenceTransformer
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  import faiss
7
  from langchain.prompts import PromptTemplate
 
8
 
9
  import time
10
  import torch
@@ -24,6 +25,15 @@ dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
24
  #Returns a list of dictionaries, each representing a row in the dataset.
25
  print(dataset[1])
26
  length = len(dataset)
 
 
 
 
 
 
 
 
 
27
 
28
  #Itemdetails = dataset.items()
29
  #print(Itemdetails)
@@ -35,12 +45,12 @@ embedding_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
35
  #doc_func = lambda x: x.text
36
  #dataset = list(map(doc_func, dataset))
37
 
38
- def embedder(dataset):
39
- embeddings = embedding_model.encode(dataset["text"])
40
- dataset = dataset.add_column('embeddings', embeddings)
41
- return dataset
42
- updated_dataset = dataset.map(embedder)
43
- dataset['text'][:length]
44
 
45
  #print(embeddings)
46
 
 
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  import faiss
7
  from langchain.prompts import PromptTemplate
8
+ import pandas as pd
9
 
10
  import time
11
  import torch
 
25
  #Returns a list of dictionaries, each representing a row in the dataset.
26
  print(dataset[1])
27
  length = len(dataset)
28
+ df = pd.DataFrame(dataset)
29
+
30
+ embeddings = embedding_model.encode(dataset["text"])
31
+ print(embeddings)
32
+
33
+ df['embeddings'] = embeddings
34
+ dataset = Dataset.from_pandas(df)
35
+ print(dataset[1])
36
+
37
 
38
  #Itemdetails = dataset.items()
39
  #print(Itemdetails)
 
45
  #doc_func = lambda x: x.text
46
  #dataset = list(map(doc_func, dataset))
47
 
48
+ #def embedder(dataset):
49
+ # embeddings = embedding_model.encode(dataset["text"])
50
+ # dataset = dataset.add_column('embeddings', embeddings)
51
+ # return dataset
52
+ #updated_dataset = dataset.map(embedder)
53
+ #dataset['text'][:length]
54
 
55
  #print(embeddings)
56