kevintu commited on
Commit
e7c7b50
·
verified ·
1 Parent(s): a7dc23c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -24
README.md CHANGED
@@ -17,15 +17,12 @@ To test the model, run the following code or paste your essay into the API inter
17
  1) Please use the following Python code if you want to get the ouput values ranging from 1 to 5.
18
 
19
  ```
20
- #import packages
21
-
22
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
23
  import torch
 
24
  model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
25
  tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
26
 
27
-
28
- # Example new text input
29
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
30
 
31
 
@@ -37,12 +34,9 @@ new_text = "The English Language Learner Insight, Proficiency and Skills Evaluat
37
  # new_text = file.read()
38
 
39
 
40
- # Encode the text using the same tokenizer used during training
41
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
42
 
43
-
44
- # Move the model to the correct device (CPU in this case, or GPU if available)
45
- model.eval() # Set the model to evaluation mode
46
 
47
  # Perform the prediction
48
  with torch.no_grad():
@@ -52,13 +46,11 @@ with torch.no_grad():
52
  predictions = outputs.logits.squeeze()
53
 
54
 
55
- # Assuming the model is a regression model and outputs raw scores
56
- predicted_scores = predictions.numpy() # Convert to numpy array if necessary
57
- trait_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
58
 
59
- # Print the predicted personality traits scores
60
- for trait, score in zip(trait_names, predicted_scores):
61
- print(f"{trait}: {score:.4f}")
62
 
63
  ##"output" (values raning from 1 to 5):
64
  #cohesion: 3.5399
@@ -73,37 +65,34 @@ for trait, score in zip(trait_names, predicted_scores):
73
  2) However, implement the following code if you expect to obtain the output values between 1 to 10.
74
 
75
  ```
76
- # Import packages
77
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
78
  import torch
79
 
80
- # Load model and tokenizer
81
  model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
82
  tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
83
 
84
- # Example new text input
85
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
86
 
87
- # Encode the text
88
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
89
 
90
- # Evaluate model
91
  model.eval()
92
  with torch.no_grad():
93
  outputs = model(**encoded_input)
94
 
95
- # Get predictions
96
  predictions = outputs.logits.squeeze()
97
 
98
- # Convert predictions if necessary
99
  predicted_scores = predictions.numpy() # Convert to numpy array
100
- trait_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
101
 
102
  # Scale predictions from 1 to 10
103
  scaled_scores = 2.25 * predicted_scores - 1.25
104
 
105
- # Print the scaled personality traits scores
106
- for trait, score in zip(trait_names, scaled_scores):
107
  print(f"{trait}: {score:.4f}")
108
 
109
  ##"ouput" (values between 1-10)
 
17
  1) Please use the following Python code if you want to get the ouput values ranging from 1 to 5.
18
 
19
  ```
 
 
20
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
21
  import torch
22
+
23
  model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
24
  tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
25
 
 
 
26
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
27
 
28
 
 
34
  # new_text = file.read()
35
 
36
 
 
37
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
38
 
39
+ model.eval()
 
 
40
 
41
  # Perform the prediction
42
  with torch.no_grad():
 
46
  predictions = outputs.logits.squeeze()
47
 
48
 
49
+ predicted_scores = predictions.numpy()
50
+ item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
 
51
 
52
+ for item, score in zip(item_names, predicted_scores):
53
+ print(f"{item}: {score:.4f}")
 
54
 
55
  ##"output" (values raning from 1 to 5):
56
  #cohesion: 3.5399
 
65
  2) However, implement the following code if you expect to obtain the output values between 1 to 10.
66
 
67
  ```
 
68
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
69
  import torch
70
 
71
+
72
  model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
73
  tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
74
 
75
+
76
  new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
77
 
 
78
  encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
79
 
80
+
81
  model.eval()
82
  with torch.no_grad():
83
  outputs = model(**encoded_input)
84
 
85
+
86
  predictions = outputs.logits.squeeze()
87
 
 
88
  predicted_scores = predictions.numpy() # Convert to numpy array
89
+ item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
90
 
91
  # Scale predictions from 1 to 10
92
  scaled_scores = 2.25 * predicted_scores - 1.25
93
 
94
+
95
+ for item, score in zip(item_names, scaled_scores):
96
  print(f"{trait}: {score:.4f}")
97
 
98
  ##"ouput" (values between 1-10)