ahmedheakl commited on
Commit
b5a7011
1 Parent(s): cd68dfa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -0
README.md CHANGED
@@ -13,6 +13,71 @@ metrics:
13
  pipeline_tag: text-classification
14
  ---
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Model Card for Model ID
17
 
18
  **Please see paper & code for more information:**
 
13
  pipeline_tag: text-classification
14
  ---
15
 
16
+ # How to use
17
+
18
+ In this example, we do an inference on a sample from our dataset (_ResumeAtlas_). You can increase `max_length` for more accurate predictions.
19
+
20
+ ```python
21
+ !pip install datasets
22
+
23
+ import numpy as np
24
+ import torch
25
+ from transformers import BertForSequenceClassification, BertTokenizer
26
+ from datasets import load_dataset
27
+ from sklearn import preprocessing
28
+
29
+ dataset_id='ahmedheakl/resume-atlas'
30
+ model_id='ahmedheakl/bert-resume-classification'
31
+ label_column = "Category"
32
+ num_labels=43
33
+ output_attentions=False
34
+ output_hidden_states=False
35
+ do_lower_case=True
36
+ add_special_tokens=True
37
+ max_length=512
38
+ pad_to_max_length=True
39
+ return_attention_mask=True
40
+ truncation=True
41
+
42
+ ds = load_dataset(dataset_id, trust_remote_code=True)
43
+
44
+ le = preprocessing.LabelEncoder()
45
+ le.fit(ds['train'][label_column])
46
+
47
+
48
+ tokenizer = BertTokenizer.from_pretrained(model_id, do_lower_case=do_lower_case)
49
+ model = BertForSequenceClassification.from_pretrained(
50
+ model_id,
51
+ num_labels = num_labels,
52
+ output_attentions = output_attentions,
53
+ output_hidden_states = output_hidden_states,
54
+ )
55
+
56
+ model = model.to('cuda').eval()
57
+ sent = ds['train'][0]['Text']
58
+
59
+ encoded_dict = tokenizer.encode_plus(
60
+ sent,
61
+ add_special_tokens=add_special_tokens,
62
+ max_length=max_length,
63
+ pad_to_max_length=pad_to_max_length,
64
+ return_attention_mask=return_attention_mask,
65
+ return_tensors='pt',
66
+ truncation=truncation,
67
+ )
68
+ input_ids = encoded_dict['input_ids'].to('cuda')
69
+ attention_mask = encoded_dict['attention_mask'].to('cuda')
70
+
71
+ outputs = model(
72
+ input_ids,
73
+ token_type_ids=None,
74
+ attention_mask=attention_mask
75
+ )
76
+
77
+ label_id = np.argmax(outputs['logits'].cpu().detach().tolist(), axis=1)
78
+ print(f'Predicted: {le.inverse_transform(label_id)[0]} | Ground: {ds["train"][0][label_column]}')
79
+ ```
80
+
81
  # Model Card for Model ID
82
 
83
  **Please see paper & code for more information:**