Tom Aarsen commited on
Commit
10a34f1
·
1 Parent(s): 00b4c3f

embeddings models -> embedding models

Browse files
Files changed (1) hide show
  1. README.md +9 -9
README.md CHANGED
@@ -59,14 +59,14 @@ from transformers import AutoTokenizer, AutoModel
59
  import torch
60
  import torch.nn.functional as F
61
 
62
- #Mean Pooling - Take average of all tokens
63
  def mean_pooling(model_output, attention_mask):
64
- token_embeddings = model_output.last_hidden_state #First element of model_output contains all token embeddings
65
  input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
66
  return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
67
 
68
 
69
- #Encode text
70
  def encode(texts):
71
  # Tokenize sentences
72
  encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
@@ -92,27 +92,27 @@ docs = ["Around 9 Million people live in London", "London is known for its finan
92
  tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/multi-qa-mpnet-base-cos-v1")
93
  model = AutoModel.from_pretrained("sentence-transformers/multi-qa-mpnet-base-cos-v1")
94
 
95
- #Encode query and docs
96
  query_emb = encode(query)
97
  doc_emb = encode(docs)
98
 
99
- #Compute dot score between query and all document embeddings
100
  scores = torch.mm(query_emb, doc_emb.transpose(0, 1))[0].cpu().tolist()
101
 
102
- #Combine docs & scores
103
  doc_score_pairs = list(zip(docs, scores))
104
 
105
- #Sort by decreasing score
106
  doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
107
 
108
- #Output passages & scores
109
  for doc, score in doc_score_pairs:
110
  print(score, doc)
111
  ```
112
 
113
  ## Usage (Text Embeddings Inference (TEI))
114
 
115
- [Text Embeddings Inference (TEI)](https://github.com/huggingface/text-embeddings-inference) is a blazing fast inference solution for text embeddings models.
116
 
117
  - CPU:
118
  ```bash
 
59
  import torch
60
  import torch.nn.functional as F
61
 
62
+ # Mean Pooling - Take average of all tokens
63
  def mean_pooling(model_output, attention_mask):
64
+ token_embeddings = model_output.last_hidden_state # First element of model_output contains all token embeddings
65
  input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
66
  return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
67
 
68
 
69
+ # Encode text
70
  def encode(texts):
71
  # Tokenize sentences
72
  encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
 
92
  tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/multi-qa-mpnet-base-cos-v1")
93
  model = AutoModel.from_pretrained("sentence-transformers/multi-qa-mpnet-base-cos-v1")
94
 
95
+ # Encode query and docs
96
  query_emb = encode(query)
97
  doc_emb = encode(docs)
98
 
99
+ # Compute dot score between query and all document embeddings
100
  scores = torch.mm(query_emb, doc_emb.transpose(0, 1))[0].cpu().tolist()
101
 
102
+ # Combine docs & scores
103
  doc_score_pairs = list(zip(docs, scores))
104
 
105
+ # Sort by decreasing score
106
  doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
107
 
108
+ # Output passages & scores
109
  for doc, score in doc_score_pairs:
110
  print(score, doc)
111
  ```
112
 
113
  ## Usage (Text Embeddings Inference (TEI))
114
 
115
+ [Text Embeddings Inference (TEI)](https://github.com/huggingface/text-embeddings-inference) is a blazing fast inference solution for text embedding models.
116
 
117
  - CPU:
118
  ```bash