mustafoyev202 commited on
Commit
91d968a
·
verified ·
1 Parent(s): 2811581

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -24
README.md CHANGED
@@ -87,52 +87,52 @@ The following hyperparameters were used during training:
87
  ### Usage
88
 
89
 
90
- -from transformers import AutoTokenizer, AutoModelForTokenClassification
91
- -import torch
92
 
93
- -custom_id2label = {
94
  0: "O", 1: "B-CARDINAL", 2: "I-CARDINAL", 3: "B-DATE", 4: "I-DATE",
95
  5: "B-EVENT", 6: "I-EVENT", 7: "B-GPE", 8: "I-GPE", 9: "B-LOC", 10: "I-LOC",
96
  11: "B-MONEY", 12: "I-MONEY", 13: "B-ORDINAL", 14: "B-ORG", 15: "I-ORG",
97
  16: "B-PERCENT", 17: "I-PERCENT", 18: "B-PERSON", 19: "I-PERSON",
98
  20: "B-TIME", 21: "I-TIME"
99
  }
100
- -custom_label2id = {v: k for k, v in custom_id2label.items()}
101
 
102
- -model_name = "mustafoyev202/roberta-uz"
103
- -tokenizer = AutoTokenizer.from_pretrained(model_name)
104
- -model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=23)
105
 
106
- -model.config.id2label = custom_id2label
107
- -model.config.label2id = custom_label2id
108
 
109
- -text = "Tesla kompaniyasi AQSHda joylashgan."
110
 
111
- -tokens = tokenizer(text.split(), return_tensors="pt", is_split_into_words=True)
112
 
113
- -with torch.no_grad():
114
  logits = model(**tokens).logits
115
 
116
- -predicted_token_class_ids = logits.argmax(-1).squeeze().tolist()
117
 
118
- -word_ids = tokens.word_ids()
119
- -previous_word_id = None
120
- -word_predictions = {}
121
 
122
- -for i, word_id in enumerate(word_ids):
123
  if word_id is not None:
124
  label = custom_id2label[predicted_token_class_ids[i]]
125
  if word_id != previous_word_id: # New word
126
  word_predictions[word_id] = label
127
  previous_word_id = word_id
128
 
129
- -words = text.split() # Splitting for simplicity
130
- -final_predictions = [(word, word_predictions.get(i, "O")) for i, word in enumerate(words)]
131
 
132
- -print("Predictions:")
133
- -for word, label in final_predictions:
134
  print(f"{word}: {label}")
135
 
136
- -labels = torch.tensor([predicted_token_class_ids]).unsqueeze(0) # Adjust dimensions
137
- -loss = model(**tokens, labels=labels).loss
138
- -print("\nLoss:", round(loss.item(), 2))
 
87
  ### Usage
88
 
89
 
90
+ - from transformers import AutoTokenizer, AutoModelForTokenClassification
91
+ - import torch
92
 
93
+ - custom_id2label = {
94
  0: "O", 1: "B-CARDINAL", 2: "I-CARDINAL", 3: "B-DATE", 4: "I-DATE",
95
  5: "B-EVENT", 6: "I-EVENT", 7: "B-GPE", 8: "I-GPE", 9: "B-LOC", 10: "I-LOC",
96
  11: "B-MONEY", 12: "I-MONEY", 13: "B-ORDINAL", 14: "B-ORG", 15: "I-ORG",
97
  16: "B-PERCENT", 17: "I-PERCENT", 18: "B-PERSON", 19: "I-PERSON",
98
  20: "B-TIME", 21: "I-TIME"
99
  }
100
+ - custom_label2id = {v: k for k, v in custom_id2label.items()}
101
 
102
+ - model_name = "mustafoyev202/roberta-uz"
103
+ - tokenizer = AutoTokenizer.from_pretrained(model_name)
104
+ - model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=23)
105
 
106
+ - model.config.id2label = custom_id2label
107
+ - model.config.label2id = custom_label2id
108
 
109
+ - text = "Tesla kompaniyasi AQSHda joylashgan."
110
 
111
+ - tokens = tokenizer(text.split(), return_tensors="pt", is_split_into_words=True)
112
 
113
+ - with torch.no_grad():
114
  logits = model(**tokens).logits
115
 
116
+ - predicted_token_class_ids = logits.argmax(-1).squeeze().tolist()
117
 
118
+ - word_ids = tokens.word_ids()
119
+ - previous_word_id = None
120
+ - word_predictions = {}
121
 
122
+ - for i, word_id in enumerate(word_ids):
123
  if word_id is not None:
124
  label = custom_id2label[predicted_token_class_ids[i]]
125
  if word_id != previous_word_id: # New word
126
  word_predictions[word_id] = label
127
  previous_word_id = word_id
128
 
129
+ - words = text.split() # Splitting for simplicity
130
+ - final_predictions = [(word, word_predictions.get(i, "O")) for i, word in enumerate(words)]
131
 
132
+ - print("Predictions:")
133
+ - for word, label in final_predictions:
134
  print(f"{word}: {label}")
135
 
136
+ - labels = torch.tensor([predicted_token_class_ids]).unsqueeze(0) # Adjust dimensions
137
+ - loss = model(**tokens, labels=labels).loss
138
+ - print("\nLoss:", round(loss.item(), 2))