BERTonSST2Dataset

Sleeping

App Files Files Community

ahamedddd commited on Feb 16, 2023

Commit

d222e8f

1 Parent(s): 1123674

first commit

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +101 -0
model.py +16 -0
requirements.txt +4 -0
xlmr_base_encoder.pth +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+xlmr_base_encoder.pth filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+### 1. Imports and class names setup ###
+import gradio as gr
+import os
+import torch
+import torchtext
+from model import xlmr_base_encoder_model
+from timeit import default_timer as timer
+from torchdata.datapipes.iter import IterableWrapper
+from torch.utils.data import DataLoader
+import torchtext.functional as F
+# Setup class names
+class_names = ["Bad", "Good"]
+### 2. Model and transforms preparation ###
+model, transforms = xlmr_base_encoder_model(
+  num_classes = 2
+)
+# load save weights
+model.load_state_dict(
+  torch.load(
+    f = "xlmr_base_encoder.pth",
+    map_location = torch.device("cpu") # Load the model to the CPU
+  )
+)
+### 3. Predict function ###
+def predict(string):
+  start_time = timer()
+  var = (string, -9999999)
+  dp = IterableWrapper([var])
+  dp = dp.sharding_filter()
+  padding_idx = 1
+  bos_idx = 0
+  eos_idx = 2
+  max_seq_len = 256
+  xlmr_vocab_path = r"https://download.pytorch.org/models/text/xlmr.vocab.pt"
+  xlmr_spm_model_path = r"https://download.pytorch.org/models/text/xlmr.sentencepiece.bpe.model"
+  text_transform = T.Sequential(
+      T.SentencePieceTokenizer(xlmr_spm_model_path),
+      T.VocabTransform(load_state_dict_from_url(xlmr_vocab_path)),
+      T.Truncate(max_seq_len-2),
+      T.AddToken(token = bos_idx, begin = True),
+      T.AddToken(token = eos_idx, begin = False)
+  )
+  # Transform the raw dataset using non-batched API (i.e apply transformation line by line)
+  def apply_transform(x):
+    return transform(x[0]), x[1]
+  dp = dp.map(apply_transform)
+  dp = dp.batch(1)
+  dp = dp.rows2columnar(["token_ids", "target"])
+  dp = DataLoader(dp, batch_size=None)
+  val = next(iter(dp))
+  model.to('cpu')
+  value = F.to_tensor(val["token_ids"], padding_value = padding_idx).to('cpu')
+    # Pass transformed image through the model and turn the prediction logits into probabilities
+  model.eval()
+  with torch.inference_mode():
+    answer = model(value)
+  print(answer)
+  # answer = answer.argmax(1)
+  answer = torch.softmax(answer, dim=1)
+  pred_labels_and_probs = {class_names[i]: float(answer[0][i]) for i in range(len(class_names))}
+  # Calculate pred time
+  end_time = timer()
+  pred_time = round(end_time - start_time, 4)
+  # Return pred dict and pred time
+  return pred_labels_and_probs, pred_time
+### 4. Gradio app ###
+title = "Good or Bad"
+description = "Using XLMR_BASE_ENCODER"
+# Create the gradio demo
+demo = gr.Interface(
+    fn = predict, # maps inputs to outputs
+    inputs = "textbox",
+    outputs=[
+        gr.Label(num_top_classes=2, label="Predictions"),
+        gr.Number(label = "Prediction time(s) ")
+    ],
+    title = title,
+    description = description,
+    # article = article
+)
+# launch the demo!
+demo.launch()

model.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import torch
+import torchtext
+from torchtext.models import RobertaClassificationHead, XLMR_BASE_ENCODER
+from torch import nn
+def xlmr_base_encoder_model(num_classes:int=2, # default output classes = 2 (Bad, Good)):
+  # 1, 2, 3 Create EffNetB2 pretrained weights, transforms and model
+  transforms = torchtext.models.XLMR_BASE_ENCODER.transform()
+  classifier_head = torchtext.RobertaClassificationHead(num_classes = 2, input_dim = 768)
+  model = XLMR_BASE_ENCODER.get_model(head = classifier_head)
+  # 4. Freeze all layers in the base model
+  for param in model.parameters():
+    param.requires_grad = False
+  return model, transforms

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch == 1.13.1
+torchvision == 0.14.1
+torchdata == 0.5.1
+gradio == 3.1.4

xlmr_base_encoder.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bf2d7a6965a263041383b86e03e44e34ff076c3d08fda84d5b7353e28c09b97
+size 1112239509