rrg92 commited on
Commit
391a4ae
·
verified ·
1 Parent(s): 341f11c

Changed to nomic

Browse files
Files changed (1) hide show
  1. app.py +25 -24
app.py CHANGED
@@ -6,15 +6,16 @@ import spaces
6
  import torch
7
 
8
  # neuralmind/bert-base-portuguese-cased
9
- ModelName = "neuralmind/bert-base-portuguese-cased"
10
- model = AutoModel.from_pretrained(ModelName)
11
- tokenizer = AutoTokenizer.from_pretrained(ModelName, do_lower_case=False)
12
- processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
13
- vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
14
 
15
- # tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
16
- # text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
17
- # text_model.eval()
 
 
 
18
 
19
  def mean_pooling(model_output, attention_mask):
20
  token_embeddings = model_output[0]
@@ -26,25 +27,25 @@ def TxtEmbed(text):
26
 
27
 
28
 
29
- input_ids = tokenizer.encode(text, return_tensors='pt')
30
 
31
- with torch.no_grad():
32
- outs = model(input_ids)
33
- encoded = outs[0][0, 1:-1] # Ignore [CLS] and [SEP] special tokens
34
- return (encoded.tolist())[0];
35
 
36
 
37
- #sentences = [text]
38
- #encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
39
- #
40
- #with torch.no_grad():
41
- # model_output = text_model(**encoded_input)
42
- #
43
- #text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
44
- #text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
45
- #text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
46
- #
47
- # return (text_embeddings.tolist)[0]
48
 
49
 
50
 
 
6
  import torch
7
 
8
  # neuralmind/bert-base-portuguese-cased
9
+ #ModelName = "neuralmind/bert-base-portuguese-cased"
10
+ #model = AutoModel.from_pretrained(ModelName)
11
+ #tokenizer = AutoTokenizer.from_pretrained(ModelName, do_lower_case=False)
 
 
12
 
13
+
14
+ #processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
15
+ #vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
16
+ tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
17
+ text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
18
+ text_model.eval()
19
 
20
  def mean_pooling(model_output, attention_mask):
21
  token_embeddings = model_output[0]
 
27
 
28
 
29
 
30
+ #input_ids = tokenizer.encode(text, return_tensors='pt')
31
 
32
+ #with torch.no_grad():
33
+ # outs = model(input_ids)
34
+ # encoded = outs[0][0, 1:-1] # Ignore [CLS] and [SEP] special tokens
35
+ #return (encoded.tolist())[0];
36
 
37
 
38
+ sentences = [text]
39
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
40
+
41
+ with torch.no_grad():
42
+ model_output = text_model(**encoded_input)
43
+
44
+ text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
45
+ text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
46
+ text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
47
+
48
+ return (text_embeddings.tolist)[0]
49
 
50
 
51