Update README.md
Browse files
README.md
CHANGED
@@ -17,15 +17,12 @@ To test the model, run the following code or paste your essay into the API inter
|
|
17 |
1) Please use the following Python code if you want to get the ouput values ranging from 1 to 5.
|
18 |
|
19 |
```
|
20 |
-
#import packages
|
21 |
-
|
22 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
23 |
import torch
|
|
|
24 |
model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
|
25 |
tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
|
26 |
|
27 |
-
|
28 |
-
# Example new text input
|
29 |
new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
|
30 |
|
31 |
|
@@ -37,12 +34,9 @@ new_text = "The English Language Learner Insight, Proficiency and Skills Evaluat
|
|
37 |
# new_text = file.read()
|
38 |
|
39 |
|
40 |
-
# Encode the text using the same tokenizer used during training
|
41 |
encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
42 |
|
43 |
-
|
44 |
-
# Move the model to the correct device (CPU in this case, or GPU if available)
|
45 |
-
model.eval() # Set the model to evaluation mode
|
46 |
|
47 |
# Perform the prediction
|
48 |
with torch.no_grad():
|
@@ -52,13 +46,11 @@ with torch.no_grad():
|
|
52 |
predictions = outputs.logits.squeeze()
|
53 |
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
trait_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
print(f"{trait}: {score:.4f}")
|
62 |
|
63 |
##"output" (values raning from 1 to 5):
|
64 |
#cohesion: 3.5399
|
@@ -73,37 +65,34 @@ for trait, score in zip(trait_names, predicted_scores):
|
|
73 |
2) However, implement the following code if you expect to obtain the output values between 1 to 10.
|
74 |
|
75 |
```
|
76 |
-
# Import packages
|
77 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
78 |
import torch
|
79 |
|
80 |
-
|
81 |
model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
|
82 |
tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
|
83 |
|
84 |
-
|
85 |
new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
|
86 |
|
87 |
-
# Encode the text
|
88 |
encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
89 |
|
90 |
-
|
91 |
model.eval()
|
92 |
with torch.no_grad():
|
93 |
outputs = model(**encoded_input)
|
94 |
|
95 |
-
|
96 |
predictions = outputs.logits.squeeze()
|
97 |
|
98 |
-
# Convert predictions if necessary
|
99 |
predicted_scores = predictions.numpy() # Convert to numpy array
|
100 |
-
|
101 |
|
102 |
# Scale predictions from 1 to 10
|
103 |
scaled_scores = 2.25 * predicted_scores - 1.25
|
104 |
|
105 |
-
|
106 |
-
for
|
107 |
print(f"{trait}: {score:.4f}")
|
108 |
|
109 |
##"ouput" (values between 1-10)
|
|
|
17 |
1) Please use the following Python code if you want to get the ouput values ranging from 1 to 5.
|
18 |
|
19 |
```
|
|
|
|
|
20 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
21 |
import torch
|
22 |
+
|
23 |
model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
|
24 |
tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
|
25 |
|
|
|
|
|
26 |
new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
|
27 |
|
28 |
|
|
|
34 |
# new_text = file.read()
|
35 |
|
36 |
|
|
|
37 |
encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
38 |
|
39 |
+
model.eval()
|
|
|
|
|
40 |
|
41 |
# Perform the prediction
|
42 |
with torch.no_grad():
|
|
|
46 |
predictions = outputs.logits.squeeze()
|
47 |
|
48 |
|
49 |
+
predicted_scores = predictions.numpy()
|
50 |
+
item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
|
|
|
51 |
|
52 |
+
for item, score in zip(item_names, predicted_scores):
|
53 |
+
print(f"{item}: {score:.4f}")
|
|
|
54 |
|
55 |
##"output" (values raning from 1 to 5):
|
56 |
#cohesion: 3.5399
|
|
|
65 |
2) However, implement the following code if you expect to obtain the output values between 1 to 10.
|
66 |
|
67 |
```
|
|
|
68 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
69 |
import torch
|
70 |
|
71 |
+
|
72 |
model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
|
73 |
tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
|
74 |
|
75 |
+
|
76 |
new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
|
77 |
|
|
|
78 |
encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
79 |
|
80 |
+
|
81 |
model.eval()
|
82 |
with torch.no_grad():
|
83 |
outputs = model(**encoded_input)
|
84 |
|
85 |
+
|
86 |
predictions = outputs.logits.squeeze()
|
87 |
|
|
|
88 |
predicted_scores = predictions.numpy() # Convert to numpy array
|
89 |
+
item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
|
90 |
|
91 |
# Scale predictions from 1 to 10
|
92 |
scaled_scores = 2.25 * predicted_scores - 1.25
|
93 |
|
94 |
+
|
95 |
+
for item, score in zip(item_names, scaled_scores):
|
96 |
print(f"{trait}: {score:.4f}")
|
97 |
|
98 |
##"ouput" (values between 1-10)
|