Login to the Hub first
Browse files- background_inference.py +7 -7
- leaderboard_info.md +2 -2
- requirements.txt +1 -0
background_inference.py
CHANGED
@@ -5,6 +5,10 @@ import datasets
|
|
5 |
import eval_utils
|
6 |
from constants import DIALECTS_WITH_LABELS
|
7 |
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
|
|
|
|
|
|
|
|
|
8 |
|
9 |
model_name = sys.argv[1]
|
10 |
commit_id = sys.argv[2]
|
@@ -19,16 +23,12 @@ utils.update_model_queue(
|
|
19 |
)
|
20 |
|
21 |
try:
|
22 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
23 |
-
model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
|
24 |
-
)
|
25 |
if inference_function == "prompt_chat_LLM":
|
26 |
-
model = AutoModel.from_pretrained(
|
27 |
-
model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
|
28 |
-
)
|
29 |
else:
|
30 |
model = AutoModelForSequenceClassification.from_pretrained(
|
31 |
-
model_name, revision=commit_id
|
32 |
)
|
33 |
|
34 |
# Load the dataset
|
|
|
5 |
import eval_utils
|
6 |
from constants import DIALECTS_WITH_LABELS
|
7 |
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
|
8 |
+
from huggingface_hub import login
|
9 |
+
|
10 |
+
access_token = os.environ["HF_TOKEN"]
|
11 |
+
login(token=access_token)
|
12 |
|
13 |
model_name = sys.argv[1]
|
14 |
commit_id = sys.argv[2]
|
|
|
23 |
)
|
24 |
|
25 |
try:
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, revision=commit_id)
|
|
|
|
|
27 |
if inference_function == "prompt_chat_LLM":
|
28 |
+
model = AutoModel.from_pretrained(model_name, revision=commit_id)
|
|
|
|
|
29 |
else:
|
30 |
model = AutoModelForSequenceClassification.from_pretrained(
|
31 |
+
model_name, revision=commit_id
|
32 |
)
|
33 |
|
34 |
# Load the dataset
|
leaderboard_info.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
## Test Set Details
|
2 |
-
The test set used for evaluation is composed of 1000 sentences geolocated to the 14 most-populated Arab countries (excluding Somalia from which data was scarce). Each sample is annotated by native speakers recruited from
|
3 |
|
4 |
## Evaluation Metrics
|
5 |
-
We compute the precision, recall, and F1 scores for each of the
|
6 |
|
7 |
## Data Access
|
8 |
If you need to access the single-label training sets, and the multi-label development set, please fill the following form: https://forms.gle/t3QTC6ZqyDJBzAau8
|
|
|
1 |
## Test Set Details
|
2 |
+
The test set used for evaluation is composed of 1000 sentences geolocated to the 14 most-populated Arab countries (excluding Somalia from which data was scarce). Each sample is annotated by native speakers recruited from 11 different Arab countries, namely: Algeria, Egypt, Iraq, Jordan, Morocco, Palestine, Saudi Arabia, Sudan, Syria, Tunisia, Yemen.
|
3 |
|
4 |
## Evaluation Metrics
|
5 |
+
We compute the precision, recall, and F1 scores for each of the 11 countries (treating each label as a binary classification problem).
|
6 |
|
7 |
## Data Access
|
8 |
If you need to access the single-label training sets, and the multi-label development set, please fill the following form: https://forms.gle/t3QTC6ZqyDJBzAau8
|
requirements.txt
CHANGED
@@ -5,3 +5,4 @@ pandas
|
|
5 |
numpy
|
6 |
scikit-learn
|
7 |
tabulate
|
|
|
|
5 |
numpy
|
6 |
scikit-learn
|
7 |
tabulate
|
8 |
+
huggingface_hub
|