KevSun
/

Engessay_grading_ML

Text Classification

Transformers

PyTorch

roberta

Model card Files Files and versions Community

kevintu commited on May 17, 2024

Commit

e7c7b50

verified ·

1 Parent(s): a7dc23c

Update README.md

Browse files

Files changed (1) hide show

README.md +13 -24

README.md CHANGED Viewed

@@ -17,15 +17,12 @@ To test the model, run the following code or paste your essay into the API inter
 1) Please use the following Python code if you want to get the ouput values ranging from 1 to 5.
 ```
-#import packages
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
 tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
-# Example new text input
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
@@ -37,12 +34,9 @@ new_text = "The English Language Learner Insight, Proficiency and Skills Evaluat
 #    new_text = file.read()
-# Encode the text using the same tokenizer used during training
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
-# Move the model to the correct device (CPU in this case, or GPU if available)
-model.eval()  # Set the model to evaluation mode
 # Perform the prediction
 with torch.no_grad():
@@ -52,13 +46,11 @@ with torch.no_grad():
 predictions = outputs.logits.squeeze()
-# Assuming the model is a regression model and outputs raw scores
-predicted_scores = predictions.numpy()  # Convert to numpy array if necessary
-trait_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar",  "conventions"]
-# Print the predicted personality traits scores
-for trait, score in zip(trait_names, predicted_scores):
-    print(f"{trait}: {score:.4f}")
 ##"output" (values raning from 1 to 5):
 #cohesion: 3.5399
@@ -73,37 +65,34 @@ for trait, score in zip(trait_names, predicted_scores):
 2) However, implement the following code if you expect to obtain the output values between 1 to 10.
 ```
-# Import packages
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
-# Load model and tokenizer
 model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
 tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
-# Example new text input
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
-# Encode the text
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
-# Evaluate model
 model.eval()
 with torch.no_grad():
     outputs = model(**encoded_input)
-# Get predictions
 predictions = outputs.logits.squeeze()
-# Convert predictions if necessary
 predicted_scores = predictions.numpy()  # Convert to numpy array
-trait_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
 # Scale predictions from 1 to 10
 scaled_scores = 2.25 * predicted_scores - 1.25
-# Print the scaled personality traits scores
-for trait, score in zip(trait_names, scaled_scores):
     print(f"{trait}: {score:.4f}")
 ##"ouput" (values between 1-10)

 1) Please use the following Python code if you want to get the ouput values ranging from 1 to 5.
 ```
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
 tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 #    new_text = file.read()
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
+model.eval()
 # Perform the prediction
 with torch.no_grad():
 predictions = outputs.logits.squeeze()
+predicted_scores = predictions.numpy()
+item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar",  "conventions"]
+for item, score in zip(item_names, predicted_scores):
+    print(f"{item}: {score:.4f}")
 ##"output" (values raning from 1 to 5):
 #cohesion: 3.5399
 2) However, implement the following code if you expect to obtain the output values between 1 to 10.
 ```
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 model = AutoModelForSequenceClassification.from_pretrained("Kevintu/Engessay_grading_ML")
 tokenizer = AutoTokenizer.from_pretrained("Kevintu/Engessay_grading_ML")
 new_text = "The English Language Learner Insight, Proficiency and Skills Evaluation (ELLIPSE) Corpus is a freely available corpus of ~6,500 ELL writing samples that have been scored for overall holistic language proficiency as well as analytic proficiency scores related to cohesion, syntax, vocabulary, phraseology, grammar, and conventions. In addition, the ELLIPSE corpus provides individual and demographic information for the ELL writers in the corpus including economic status, gender, grade level (8-12), and race/ethnicity. The corpus provides language proficiency scores for individual writers and was developed to advance research in corpus and NLP approaches to assess overall and more fine-grained features of proficiency."
 encoded_input = tokenizer(new_text, return_tensors='pt', padding=True, truncation=True, max_length=64)
 model.eval()
 with torch.no_grad():
     outputs = model(**encoded_input)
 predictions = outputs.logits.squeeze()
 predicted_scores = predictions.numpy()  # Convert to numpy array
+item_names = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
 # Scale predictions from 1 to 10
 scaled_scores = 2.25 * predicted_scores - 1.25
+for item, score in zip(item_names, scaled_scores):
     print(f"{trait}: {score:.4f}")
 ##"ouput" (values between 1-10)