yqsong commited on
Commit
196f8b3
·
1 Parent(s): d27541e

first commit

Browse files
Files changed (2) hide show
  1. execution_accuracy.py +25 -6
  2. requirements.txt +2 -1
execution_accuracy.py CHANGED
@@ -15,7 +15,7 @@
15
 
16
  import evaluate
17
  import datasets
18
-
19
 
20
  # TODO: Add BibTeX citation
21
  _CITATION = """\
@@ -71,8 +71,9 @@ class ExecutionAccuracy(evaluate.Metric):
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
  features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
 
76
  }),
77
  # Homepage of the module for documentation
78
  homepage="http://module.homepage",
@@ -86,10 +87,28 @@ class ExecutionAccuracy(evaluate.Metric):
86
  # TODO: Download external resources if needed
87
  pass
88
 
89
- def _compute(self, predictions, references):
90
  """Returns the scores"""
91
  # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
95
  }
 
15
 
16
  import evaluate
17
  import datasets
18
+ from records import Database
19
 
20
  # TODO: Add BibTeX citation
21
  _CITATION = """\
 
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
  features=datasets.Features({
74
+ 'predictions': datasets.Value('string'),
75
+ 'references': datasets.Value('string'),
76
+ 'db_urls': datasets.Value('string'),
77
  }),
78
  # Homepage of the module for documentation
79
  homepage="http://module.homepage",
 
87
  # TODO: Download external resources if needed
88
  pass
89
 
90
+ def _compute(self, predictions, references, db_urls):
91
  """Returns the scores"""
92
  # TODO: Compute the different scores of the module
93
+ cnt = 0
94
+ for prediction, reference, db_url in zip(predictions, references, db_urls):
95
+ db = Database(db_url)
96
+ try:
97
+ pred = db.query(predictions).as_dict()
98
+ except Exception as e:
99
+ pred = []
100
+ try:
101
+ ref = db.query(references).as_dict()
102
+ except Exception as e:
103
+ ref = []
104
+ pred = [tuple(x.values()) for x in pred]
105
+ ref = [tuple(x.values()) for x in ref]
106
+ if len(pred) == len(ref):
107
+ pred.sort(key=lambda x: hash(x))
108
+ ref.sort(key=lambda x: hash(x))
109
+ if pred == ref:
110
+ cnt += 1
111
+ accuracy = cnt / len(predictions)
112
  return {
113
+ "execution accuracy": accuracy,
114
  }
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
1
+ git+https://github.com/huggingface/evaluate@main
2
+ records