Spaces:
Running
Running
initial commit
Browse files
app.py
CHANGED
@@ -166,29 +166,8 @@ def grade(file_obj, progress=gr.Progress()):
|
|
166 |
grade_sample_run_complete = False
|
167 |
temperature = 0.0
|
168 |
|
169 |
-
|
170 |
-
# try:
|
171 |
-
response = openai.ChatCompletion.create(
|
172 |
-
# model=gpt_model,
|
173 |
-
engine=gpt_model,
|
174 |
-
max_tokens=3,
|
175 |
-
temperature=temperature,
|
176 |
-
messages=messages)
|
177 |
-
content = response['choices'][0]['message']['content']
|
178 |
-
flag = True
|
179 |
-
try_time = 1
|
180 |
-
while flag:
|
181 |
try:
|
182 |
-
content = content.split(' ')[0].strip()
|
183 |
-
score = float(content)
|
184 |
-
if score > 1.0 or score < 0.0:
|
185 |
-
assert False
|
186 |
-
flag = False
|
187 |
-
except:
|
188 |
-
question = prompt + '\n' + ' | '.join([line['question'], line['answer'].replace("<AND>", " <AND> ").replace("<OR>", " <OR> "), model_pred, ""]) + "\nPredict the correctness of the answer (digit): "
|
189 |
-
messages = [
|
190 |
-
{"role": "user", "content": question},
|
191 |
-
]
|
192 |
response = openai.ChatCompletion.create(
|
193 |
# model=gpt_model,
|
194 |
engine=gpt_model,
|
@@ -196,18 +175,39 @@ def grade(file_obj, progress=gr.Progress()):
|
|
196 |
temperature=temperature,
|
197 |
messages=messages)
|
198 |
content = response['choices'][0]['message']['content']
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
if len(sample_grade['model']) >= j + 1:
|
213 |
sample_grade['model'][j] = response['model']
|
|
|
166 |
grade_sample_run_complete = False
|
167 |
temperature = 0.0
|
168 |
|
169 |
+
while not grade_sample_run_complete:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
response = openai.ChatCompletion.create(
|
172 |
# model=gpt_model,
|
173 |
engine=gpt_model,
|
|
|
175 |
temperature=temperature,
|
176 |
messages=messages)
|
177 |
content = response['choices'][0]['message']['content']
|
178 |
+
flag = True
|
179 |
+
try_time = 1
|
180 |
+
while flag:
|
181 |
+
try:
|
182 |
+
content = content.split(' ')[0].strip()
|
183 |
+
score = float(content)
|
184 |
+
if score > 1.0 or score < 0.0:
|
185 |
+
assert False
|
186 |
+
flag = False
|
187 |
+
except:
|
188 |
+
question = prompt + '\n' + ' | '.join([line['question'], line['answer'].replace("<AND>", " <AND> ").replace("<OR>", " <OR> "), model_pred, ""]) + "\nPredict the correctness of the answer (digit): "
|
189 |
+
messages = [
|
190 |
+
{"role": "user", "content": question},
|
191 |
+
]
|
192 |
+
response = openai.ChatCompletion.create(
|
193 |
+
# model=gpt_model,
|
194 |
+
engine=gpt_model,
|
195 |
+
max_tokens=3,
|
196 |
+
temperature=temperature,
|
197 |
+
messages=messages)
|
198 |
+
content = response['choices'][0]['message']['content']
|
199 |
+
try_time += 1
|
200 |
+
temperature += 0.5
|
201 |
+
print(f"{id} try {try_time} times")
|
202 |
+
print(content)
|
203 |
+
if try_time > 5:
|
204 |
+
score = 0.0
|
205 |
+
flag = False
|
206 |
+
grade_sample_run_complete = True
|
207 |
+
except:
|
208 |
+
# gpt4 may have token rate limit
|
209 |
+
print("sleep 30s")
|
210 |
+
time.sleep(30)
|
211 |
|
212 |
if len(sample_grade['model']) >= j + 1:
|
213 |
sample_grade['model'][j] = response['model']
|