Thedatababbler commited on
Commit
a5f5ad5
·
1 Parent(s): 93273ed
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -7,48 +7,49 @@ tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-u
7
  model = AutoModelForMaskedLM.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
8
 
9
  def mlm(image, text):
10
- questions_dict = {
11
- #'location': f'[CLS] Only [MASK] cells have a {cls_name}. [SEP]', #num of mask?
12
- # 'location': f'[CLS] The {cls_name} normally appears at or near the [MASK] of a cell. [SEP]',
13
- # 'color': f'[CLS] When a cell is histologically stained, the {cls_name} are in [MASK] color. [SEP]',
14
- # 'shape': f'[CLS] Mostly the shape of {cls_name} is [MASK]. [SEP]',
15
- 'location': f'[CLS] The location of {text} is at [MASK]. [SEP]',
16
- 'color': f'[CLS] The typical color of {text} is [MASK]. [SEP]',
17
- 'shape': f'[CLS] The typical shape of {text} is [MASK]. [SEP]',
18
- #'def': f'{cls_name} is a . [SEP]',
19
- }
20
- ans = list()
21
- res = defaultdict()
22
- device = 'cpu'
23
- for k, v in questions_dict.items():
24
- predicted_tokens = []
25
- tokenized_text = tokenizer.tokenize(v)
26
- indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
27
- # Create the segments tensors.
28
- segments_ids = [0] * len(tokenized_text)
29
-
30
- # Convert inputs to PyTorch tensors
31
 
32
- tokens_tensor = torch.tensor([indexed_tokens]).to(device)
33
- segments_tensors = torch.tensor([segments_ids]).to(device)
 
 
34
 
35
- masked_index = tokenized_text.index('[MASK]')
36
- with torch.no_grad():
37
- predictions = model(tokens_tensor, segments_tensors)
38
-
39
- _, predicted_index = torch.topk(predictions[0][0][masked_index], 1)#.item()
40
- predicted_index = predicted_index.detach().cpu().numpy()
41
- #print(predicted_index)
42
- for idx in predicted_index:
43
- predicted_tokens.append(tokenizer.convert_ids_to_tokens([idx])[0])
44
- # for i in range(1):
45
- # res[text][k].append(predicted_tokens)
46
- print(predicted_tokens)
47
- res[k] = predicted_tokens[0]
48
- color, shape, loc = res['color'], res['shape'], res['location']
49
- ans = f'{color} color, {shape} shape, cat at {loc}'
50
- print(ans)
51
- return image, ans
52
 
53
  def to_black(image, text):
54
  output = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
7
  model = AutoModelForMaskedLM.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
8
 
9
  def mlm(image, text):
10
+ print(text)
11
+ questions_dict = {
12
+ #'location': f'[CLS] Only [MASK] cells have a {cls_name}. [SEP]', #num of mask?
13
+ # 'location': f'[CLS] The {cls_name} normally appears at or near the [MASK] of a cell. [SEP]',
14
+ # 'color': f'[CLS] When a cell is histologically stained, the {cls_name} are in [MASK] color. [SEP]',
15
+ # 'shape': f'[CLS] Mostly the shape of {cls_name} is [MASK]. [SEP]',
16
+ 'location': f'[CLS] The location of {text} is at [MASK]. [SEP]',
17
+ 'color': f'[CLS] The typical color of {text} is [MASK]. [SEP]',
18
+ 'shape': f'[CLS] The typical shape of {text} is [MASK]. [SEP]',
19
+ #'def': f'{cls_name} is a . [SEP]',
20
+ }
21
+ ans = list()
22
+ res = defaultdict()
23
+ device = 'cpu'
24
+ for k, v in questions_dict.items():
25
+ predicted_tokens = []
26
+ tokenized_text = tokenizer.tokenize(v)
27
+ indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
28
+ # Create the segments tensors.
29
+ segments_ids = [0] * len(tokenized_text)
 
30
 
31
+ # Convert inputs to PyTorch tensors
32
+
33
+ tokens_tensor = torch.tensor([indexed_tokens]).to(device)
34
+ segments_tensors = torch.tensor([segments_ids]).to(device)
35
 
36
+ masked_index = tokenized_text.index('[MASK]')
37
+ with torch.no_grad():
38
+ predictions = model(tokens_tensor, segments_tensors)
39
+
40
+ _, predicted_index = torch.topk(predictions[0][0][masked_index], 1)#.item()
41
+ predicted_index = predicted_index.detach().cpu().numpy()
42
+ #print(predicted_index)
43
+ for idx in predicted_index:
44
+ predicted_tokens.append(tokenizer.convert_ids_to_tokens([idx])[0])
45
+ # for i in range(1):
46
+ # res[text][k].append(predicted_tokens)
47
+ print(predicted_tokens)
48
+ res[k] = predicted_tokens[0]
49
+ color, shape, loc = res['color'], res['shape'], res['location']
50
+ ans = f'{color} color, {shape} shape, {text} at {loc}'
51
+ print(ans)
52
+ return image, ans
53
 
54
  def to_black(image, text):
55
  output = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)