Spaces:

AMR-KELEG
/

MLADI

Running

AMR-KELEG commited on Jan 24

Commit

fa481ab

1 Parent(s): e103605

Login to the Hub first

Files changed (3) hide show

background_inference.py CHANGED Viewed

@@ -5,6 +5,10 @@ import datasets
 import eval_utils
 from constants import DIALECTS_WITH_LABELS
 from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
 model_name = sys.argv[1]
 commit_id = sys.argv[2]
@@ -19,16 +23,12 @@ utils.update_model_queue(
 )
 try:
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
-    )
     if inference_function == "prompt_chat_LLM":
-        model = AutoModel.from_pretrained(
-            model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
-        )
     else:
         model = AutoModelForSequenceClassification.from_pretrained(
-            model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
         )
     # Load the dataset

 import eval_utils
 from constants import DIALECTS_WITH_LABELS
 from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
+from huggingface_hub import login
+access_token = os.environ["HF_TOKEN"]
+login(token=access_token)
 model_name = sys.argv[1]
 commit_id = sys.argv[2]
 )
 try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name, revision=commit_id)
     if inference_function == "prompt_chat_LLM":
+        model = AutoModel.from_pretrained(model_name, revision=commit_id)
     else:
         model = AutoModelForSequenceClassification.from_pretrained(
+            model_name, revision=commit_id
         )
     # Load the dataset

leaderboard_info.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ## Test Set Details
-The test set used for evaluation is composed of 1000 sentences geolocated to the 14 most-populated Arab countries (excluding Somalia from which data was scarce). Each sample is annotated by native speakers recruited from 9 different Arab countries, namely: Algeria, Egypt, Iraq, Morocco, Palestine, Sudan, Syria, Tunisia, Yemen.
 ## Evaluation Metrics
-We compute the precision, recall, and F1 scores for each of the 9 countries (treating each label as a binary classification problem). Afterward,
 ## Data Access
 If you need to access the single-label training sets, and the multi-label development set, please fill the following form: https://forms.gle/t3QTC6ZqyDJBzAau8

 ## Test Set Details
+The test set used for evaluation is composed of 1000 sentences geolocated to the 14 most-populated Arab countries (excluding Somalia from which data was scarce). Each sample is annotated by native speakers recruited from 11 different Arab countries, namely: Algeria, Egypt, Iraq, Jordan, Morocco, Palestine, Saudi Arabia, Sudan, Syria, Tunisia, Yemen.
 ## Evaluation Metrics
+We compute the precision, recall, and F1 scores for each of the 11 countries (treating each label as a binary classification problem).
 ## Data Access
 If you need to access the single-label training sets, and the multi-label development set, please fill the following form: https://forms.gle/t3QTC6ZqyDJBzAau8

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ pandas
 numpy
 scikit-learn
 tabulate

 numpy
 scikit-learn
 tabulate
+huggingface_hub