ahamedddd commited on
Commit
d222e8f
·
1 Parent(s): 1123674

first commit

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +101 -0
  3. model.py +16 -0
  4. requirements.txt +4 -0
  5. xlmr_base_encoder.pth +3 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ xlmr_base_encoder.pth filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ### 1. Imports and class names setup ###
3
+ import gradio as gr
4
+ import os
5
+ import torch
6
+ import torchtext
7
+ from model import xlmr_base_encoder_model
8
+ from timeit import default_timer as timer
9
+ from torchdata.datapipes.iter import IterableWrapper
10
+ from torch.utils.data import DataLoader
11
+ import torchtext.functional as F
12
+
13
+ # Setup class names
14
+ class_names = ["Bad", "Good"]
15
+
16
+ ### 2. Model and transforms preparation ###
17
+ model, transforms = xlmr_base_encoder_model(
18
+ num_classes = 2
19
+ )
20
+
21
+ # load save weights
22
+ model.load_state_dict(
23
+ torch.load(
24
+ f = "xlmr_base_encoder.pth",
25
+ map_location = torch.device("cpu") # Load the model to the CPU
26
+ )
27
+ )
28
+
29
+ ### 3. Predict function ###
30
+
31
+ def predict(string):
32
+
33
+ start_time = timer()
34
+
35
+ var = (string, -9999999)
36
+ dp = IterableWrapper([var])
37
+ dp = dp.sharding_filter()
38
+
39
+ padding_idx = 1
40
+ bos_idx = 0
41
+ eos_idx = 2
42
+ max_seq_len = 256
43
+ xlmr_vocab_path = r"https://download.pytorch.org/models/text/xlmr.vocab.pt"
44
+ xlmr_spm_model_path = r"https://download.pytorch.org/models/text/xlmr.sentencepiece.bpe.model"
45
+
46
+ text_transform = T.Sequential(
47
+ T.SentencePieceTokenizer(xlmr_spm_model_path),
48
+ T.VocabTransform(load_state_dict_from_url(xlmr_vocab_path)),
49
+ T.Truncate(max_seq_len-2),
50
+ T.AddToken(token = bos_idx, begin = True),
51
+ T.AddToken(token = eos_idx, begin = False)
52
+ )
53
+
54
+ # Transform the raw dataset using non-batched API (i.e apply transformation line by line)
55
+ def apply_transform(x):
56
+ return transform(x[0]), x[1]
57
+
58
+ dp = dp.map(apply_transform)
59
+ dp = dp.batch(1)
60
+ dp = dp.rows2columnar(["token_ids", "target"])
61
+ dp = DataLoader(dp, batch_size=None)
62
+
63
+ val = next(iter(dp))
64
+ model.to('cpu')
65
+ value = F.to_tensor(val["token_ids"], padding_value = padding_idx).to('cpu')
66
+ # Pass transformed image through the model and turn the prediction logits into probabilities
67
+ model.eval()
68
+ with torch.inference_mode():
69
+ answer = model(value)
70
+ print(answer)
71
+ # answer = answer.argmax(1)
72
+ answer = torch.softmax(answer, dim=1)
73
+ pred_labels_and_probs = {class_names[i]: float(answer[0][i]) for i in range(len(class_names))}
74
+
75
+
76
+ # Calculate pred time
77
+ end_time = timer()
78
+ pred_time = round(end_time - start_time, 4)
79
+
80
+ # Return pred dict and pred time
81
+ return pred_labels_and_probs, pred_time
82
+
83
+ ### 4. Gradio app ###
84
+ title = "Good or Bad"
85
+ description = "Using XLMR_BASE_ENCODER"
86
+
87
+ # Create the gradio demo
88
+ demo = gr.Interface(
89
+ fn = predict, # maps inputs to outputs
90
+ inputs = "textbox",
91
+ outputs=[
92
+ gr.Label(num_top_classes=2, label="Predictions"),
93
+ gr.Number(label = "Prediction time(s) ")
94
+ ],
95
+ title = title,
96
+ description = description,
97
+ # article = article
98
+ )
99
+
100
+ # launch the demo!
101
+ demo.launch()
model.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchtext
3
+ from torchtext.models import RobertaClassificationHead, XLMR_BASE_ENCODER
4
+ from torch import nn
5
+ def xlmr_base_encoder_model(num_classes:int=2, # default output classes = 2 (Bad, Good)):
6
+ # 1, 2, 3 Create EffNetB2 pretrained weights, transforms and model
7
+ transforms = torchtext.models.XLMR_BASE_ENCODER.transform()
8
+ classifier_head = torchtext.RobertaClassificationHead(num_classes = 2, input_dim = 768)
9
+ model = XLMR_BASE_ENCODER.get_model(head = classifier_head)
10
+
11
+ # 4. Freeze all layers in the base model
12
+ for param in model.parameters():
13
+ param.requires_grad = False
14
+
15
+
16
+ return model, transforms
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch == 1.13.1
2
+ torchvision == 0.14.1
3
+ torchdata == 0.5.1
4
+ gradio == 3.1.4
xlmr_base_encoder.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf2d7a6965a263041383b86e03e44e34ff076c3d08fda84d5b7353e28c09b97
3
+ size 1112239509