poemsforaphrodite commited on
Commit
dfe0427
·
verified ·
1 Parent(s): 49d979b

Update fix.py

Browse files
Files changed (1) hide show
  1. fix.py +214 -178
fix.py CHANGED
@@ -1,22 +1,22 @@
1
  # fix.py
2
 
3
- import os
4
- import re
5
  import json
6
  import logging
7
- import concurrent.futures
8
- from typing import Dict, Any, Optional, List
9
- import queue
10
  import time
11
  from datetime import datetime
12
- import threading
13
- import functools
14
 
15
- from openai import AzureOpenAI
16
- from supabase import create_client, Client
17
- from tqdm import tqdm
18
  from dotenv import load_dotenv
 
19
  from ratelimiter import RateLimiter
 
 
 
20
 
21
  # Set up logging with thread safety and custom formatting
22
  class CustomFormatter(logging.Formatter):
@@ -81,7 +81,7 @@ SUPABASE_URL = os.getenv("SUPABASE_DB_URL")
81
  SUPABASE_API_KEY = os.getenv("SUPABASE_API_KEY")
82
  AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
83
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
84
- AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
85
  AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2023-05-15")
86
 
87
  # Validate environment variables
@@ -94,8 +94,6 @@ if not AZURE_OPENAI_KEY:
94
  missing_vars.append("AZURE_OPENAI_KEY")
95
  if not AZURE_OPENAI_ENDPOINT:
96
  missing_vars.append("AZURE_OPENAI_ENDPOINT")
97
- if not AZURE_OPENAI_DEPLOYMENT_NAME:
98
- missing_vars.append("AZURE_OPENAI_DEPLOYMENT_NAME")
99
 
100
  if missing_vars:
101
  error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
@@ -166,7 +164,11 @@ def generate_fixed_content(row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
166
  Returns a dictionary with fixed content or None if generation fails.
167
  """
168
  try:
169
- # Create system message with instructions for JSON output
 
 
 
 
170
  system_message = """You are an expert in standardized English test content. You must return your response as a valid JSON object with the following structure:
171
  {
172
  "reading_passage": "formatted passage text",
@@ -176,37 +178,35 @@ def generate_fixed_content(row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
176
  "option_c": "option C text",
177
  "option_d": "option D text",
178
  "explanation": "explanation text"
179
- }
180
-
181
- Follow these guidelines when formatting the content:
182
- 1. Reading Passage:
183
- - Present in clean, professional format with proper paragraphing
184
- - Keep academic language and formal tone
185
- - Preserve all factual information
186
- - Remove any question numbers or markers
187
- - Format titles and headers appropriately
188
-
189
- 2. Questions:
190
- - Must be answerable solely from the passage
191
- - Test key concepts, details, or arguments from the text
192
- - Follow logical progression through the text
193
- - No external knowledge required
194
- - Test different reading skills (main idea, inference, detail)
195
-
196
- 3. Answer Options:
197
- - Must relate directly to the passage
198
- - Only one option should be correct based on passage
199
- - Other options should be plausible but clearly incorrect
200
- - Options should have similar length and structure
201
-
202
- 4. Quality:
203
- - Fix grammar and clarity issues
204
- - Ensure proper organization
205
- - Use clear, unambiguous language"""
206
 
207
  # Create user message with the content to fix
208
  user_message = f"""Please format and fix the following exam content, returning a JSON object with the specified structure:
209
 
 
 
210
  Reading Passage:
211
  {row.get('reading_passage', '')}
212
 
@@ -243,28 +243,35 @@ Explanation:
243
 
244
  content = response.choices[0].message.content
245
 
 
 
 
 
 
 
 
 
 
246
  try:
247
  # Parse JSON response
248
  fixed_data = json.loads(content)
249
 
250
- # Validate required fields
251
- required_fields = ['reading_passage', 'question_text', 'option_a', 'option_b', 'option_c', 'option_d', 'explanation']
252
- missing_fields = [field for field in required_fields if field not in fixed_data or not fixed_data[field]]
 
 
 
 
 
253
 
254
- if missing_fields:
255
- logger.error(f"Missing or empty required fields: {', '.join(missing_fields)}")
256
- return None
257
-
258
- # Validate content length
259
- short_fields = [field for field in required_fields if len(str(fixed_data.get(field, ''))) < 2]
260
- if short_fields:
261
- logger.error(f"Fields with insufficient content: {', '.join(short_fields)}")
262
- return None
263
-
264
  # Copy over unchanged fields
265
  for key in row:
266
  if key not in fixed_data and key != 'id':
267
  fixed_data[key] = row[key]
 
 
 
268
 
269
  return fixed_data
270
 
@@ -295,74 +302,59 @@ def clean_text(text: str) -> str:
295
 
296
  def check_row_quality(row: Dict[str, Any]) -> bool:
297
  """
298
- Checks if the row has good quality data according to exam standards.
299
- Returns True if the row is good, False if it needs fixing.
300
  """
301
- # Skip if already fixed
302
- if row.get('is_fixed'):
303
  return True
304
 
305
- # Required fields must be present and non-empty
306
- required_fields = [
307
- 'exam_type', 'content_type', 'exam_section', 'domain', 'subdomain',
308
- 'topic', 'difficulty_level', 'reading_passage', 'question_text',
309
- 'option_a', 'option_b', 'option_c', 'option_d', 'correct_answer',
310
- 'explanation'
 
 
 
311
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- # Check for missing or empty required fields
314
- for field in required_fields:
315
- value = row.get(field, '').strip() if isinstance(row.get(field), str) else row.get(field)
316
- if not value:
317
- return False
318
-
319
- # Check for valid exam type
320
- if row['exam_type'] not in EXAM_TYPES:
321
- return False
322
-
323
- # Check for valid difficulty level
324
- if row['difficulty_level'] not in DIFFICULTY_LEVELS:
325
- return False
326
-
327
- # Check for valid correct answer format
328
- if not is_valid_correct_answer(row['correct_answer']):
329
- return False
330
-
331
- # Check for common OCR and formatting issues
332
- text_fields = ['reading_passage', 'question_text', 'option_a', 'option_b', 'option_c', 'option_d', 'explanation']
333
- for field in text_fields:
334
- text = row.get(field, '')
335
- if isinstance(text, str):
336
- # Check for OCR artifacts
337
- if any(artifact in text.lower() for artifact in [
338
- 'arebasedonthe', 'lineno', 'click here', 'seenext', 'seebelow',
339
- 'answerthefollowing', 'choosethebest', 'selectthe'
340
- ]):
341
- return False
342
-
343
- # Check for formatting issues
344
- if text.count('.') > 20: # Too many periods might indicate formatting issues
345
- return False
346
- if text.count('\n') > 20: # Too many newlines might indicate formatting issues
347
- return False
348
- if len(text.split()) < 2: # Text should have at least 2 words
349
- return False
350
-
351
- # Check minimum length requirements
352
- if len(row['reading_passage'].split()) < MIN_PASSAGE_WORDS:
353
- return False
354
-
355
  # Check for duplicate options
356
- options = [row['option_a'], row['option_b'], row['option_c'], row['option_d']]
357
- if len(set(options)) != len(options):
358
- return False
359
-
360
- # Check for explanation quality
361
- explanation = row['explanation']
362
- if len(explanation.split()) < 10: # Explanation should be reasonably detailed
363
- return False
364
- if not any(word in explanation.lower() for word in ['because', 'since', 'as', 'therefore', 'thus', 'hence']):
365
- return False
366
 
367
  return True
368
 
@@ -388,77 +380,116 @@ def update_row_in_supabase(row_id: str, fixed_data: Dict[str, Any]) -> bool:
388
  return False
389
 
390
  def process_row(row: Dict[str, Any], progress_counter: AtomicCounter, total_rows: int, row_number: int) -> Dict[str, Any]:
391
- """Process a single row with progress tracking."""
392
- result = {
393
- 'row_id': row.get('id'),
394
- 'success': False,
395
- 'message': '',
396
- 'changes_made': []
397
- }
398
-
399
  try:
400
  row_id = row.get('id')
401
- if not row_id:
402
- result['message'] = "Row without ID found"
403
- logger.warning(f"Row {row_number}: {result['message']}")
404
- return result
405
-
406
- # Check initial quality
407
- initial_quality_issues = []
408
- if not row.get('reading_passage'):
409
- initial_quality_issues.append("Missing reading passage")
410
- if not row.get('question_text'):
411
- initial_quality_issues.append("Missing question text")
412
- if not all(row.get(f'option_{opt}') for opt in ['a', 'b', 'c', 'd']):
413
- initial_quality_issues.append("Missing options")
414
- if not row.get('correct_answer'):
415
- initial_quality_issues.append("Missing correct answer")
416
-
417
- if initial_quality_issues:
418
- logger.info(f"Row {row_number}: Quality issues found - {', '.join(initial_quality_issues)}")
419
-
420
- if check_row_quality(row):
421
- success = update_row_in_supabase(row_id, {'is_fixed': True})
422
- result['success'] = success
423
- result['message'] = "Already good quality, marked as fixed"
424
- if success:
425
- logger.info(f"Row {row_number}: Already good quality. Marked as fixed.")
426
- else:
427
- logger.error(f"Row {row_number}: Failed to mark as fixed.")
428
- progress_counter.increment()
429
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
  # Generate fixed content
432
  fixed_data = generate_fixed_content(row)
433
  if not fixed_data:
434
- result['message'] = "Failed to fix content"
435
- logger.error(f"Row {row_number}: Failed to generate fixed content.")
436
- progress_counter.increment()
437
- return result
438
-
439
- # Compare changes
440
- for field in ['reading_passage', 'question_text', 'option_a', 'option_b', 'option_c', 'option_d', 'explanation']:
441
- if fixed_data.get(field) != row.get(field):
442
- result['changes_made'].append(field)
443
-
444
- fixed_data['is_fixed'] = True
445
- success = update_row_in_supabase(row_id, fixed_data)
446
- result['success'] = success
447
-
448
- if success:
449
- changes = ', '.join(result['changes_made']) if result['changes_made'] else 'No changes needed'
450
- result['message'] = f"Fixed successfully. Changes in: {changes}"
451
- logger.info(f"Row {row_number}: Fixed successfully. Modified: {changes}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  else:
453
- result['message'] = "Failed to update after fixing"
454
- logger.error(f"Row {row_number}: Failed to update after fixing.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
  except Exception as e:
457
- result['message'] = f"Error: {str(e)}"
458
- logger.error(f"Row {row_number}: Error processing - {str(e)}")
459
-
460
- progress_counter.increment()
461
- return result
 
462
 
463
  def fetch_all_unfixed_rows(supabase_client: Client, batch_size: int = 1000):
464
  """
@@ -499,6 +530,7 @@ def main():
499
  total_rows = 0
500
  success_count = 0
501
  failure_count = 0
 
502
  changes_by_field = {
503
  'reading_passage': 0,
504
  'question_text': 0,
@@ -534,6 +566,9 @@ def main():
534
  # Update changes counter
535
  for field in result['changes_made']:
536
  changes_by_field[field] = changes_by_field.get(field, 0) + 1
 
 
 
537
  else:
538
  failure_count += 1
539
  pbar.update(1)
@@ -547,6 +582,7 @@ def main():
547
  f"Total questions processed: {total_rows}",
548
  f"Successful updates: {success_count}",
549
  f"Failed updates: {failure_count}",
 
550
  f"Execution time: {execution_time:.2f} seconds",
551
  "\nChanges by field:",
552
  *[f"- {field}: {count}" for field, count in changes_by_field.items() if count > 0],
 
1
  # fix.py
2
 
3
+ import concurrent.futures
4
+ import functools
5
  import json
6
  import logging
7
+ import os
8
+ import re
9
+ import threading
10
  import time
11
  from datetime import datetime
12
+ from typing import Any, Dict, Optional
 
13
 
 
 
 
14
  from dotenv import load_dotenv
15
+ from openai import AzureOpenAI
16
  from ratelimiter import RateLimiter
17
+ from supabase import Client, create_client
18
+ from tqdm import tqdm
19
+
20
 
21
  # Set up logging with thread safety and custom formatting
22
  class CustomFormatter(logging.Formatter):
 
81
  SUPABASE_API_KEY = os.getenv("SUPABASE_API_KEY")
82
  AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
83
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
84
+ AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME_FIX", "gpt-4o-mini") # Use specific deployment for fixing
85
  AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2023-05-15")
86
 
87
  # Validate environment variables
 
94
  missing_vars.append("AZURE_OPENAI_KEY")
95
  if not AZURE_OPENAI_ENDPOINT:
96
  missing_vars.append("AZURE_OPENAI_ENDPOINT")
 
 
97
 
98
  if missing_vars:
99
  error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
 
164
  Returns a dictionary with fixed content or None if generation fails.
165
  """
166
  try:
167
+ # Determine if this is a math question
168
+ domain = row.get('domain', '').lower()
169
+ is_math = any(math_term in domain.lower() for math_term in ['math', 'algebra', 'geometry', 'calculus', 'arithmetic'])
170
+
171
+ # Create system message with domain-specific instructions
172
  system_message = """You are an expert in standardized English test content. You must return your response as a valid JSON object with the following structure:
173
  {
174
  "reading_passage": "formatted passage text",
 
178
  "option_c": "option C text",
179
  "option_d": "option D text",
180
  "explanation": "explanation text"
181
+ }"""
182
+
183
+ if is_math:
184
+ system_message += """
185
+ IMPORTANT: For ALL mathematics questions:
186
+ - You MUST set reading_passage to an empty string (""). No exceptions.
187
+ - Move any context or problem setup from the reading passage into the question_text
188
+ - The question_text should contain all necessary mathematical information
189
+ - Format: reading_passage must be "", question_text contains everything
190
+
191
+ Example math question format:
192
+ {
193
+ "reading_passage": "",
194
+ "question_text": "In the given system of equations, y = -1.5 and y = x^2 + 8x + a, where a is a positive constant. The system has exactly one distinct real solution. What is the value of a?",
195
+ ...
196
+ }"""
197
+ else:
198
+ system_message += """
199
+ For reading comprehension questions:
200
+ - Format the reading_passage professionally with proper paragraphing
201
+ - Ensure the question is answerable from the passage
202
+ - Make answer options clear and distinct
203
+ - Reference the passage in the explanation"""
 
 
 
 
204
 
205
  # Create user message with the content to fix
206
  user_message = f"""Please format and fix the following exam content, returning a JSON object with the specified structure:
207
 
208
+ Domain: {domain}
209
+
210
  Reading Passage:
211
  {row.get('reading_passage', '')}
212
 
 
243
 
244
  content = response.choices[0].message.content
245
 
246
+ # Calculate cost (gpt-4o-mini pricing)
247
+ input_tokens = (len(system_message) + len(user_message)) / 4 # Rough estimate: 4 chars per token
248
+ output_tokens = len(content) / 4
249
+ # gpt-4o-mini pricing:
250
+ # Input: $0.300 per 1M tokens
251
+ # Output: $1.200 per 1M tokens
252
+ fix_cost = (input_tokens / 1_000_000 * 0.300) + (output_tokens / 1_000_000 * 1.200)
253
+ logger.info(f"Estimated cost for fixing this question: ${fix_cost:.6f}")
254
+
255
  try:
256
  # Parse JSON response
257
  fixed_data = json.loads(content)
258
 
259
+ # For math questions, ensure reading passage is empty
260
+ if is_math and fixed_data.get('reading_passage', '').strip():
261
+ # Move reading passage content to question text if needed
262
+ current_passage = fixed_data.get('reading_passage', '').strip()
263
+ current_question = fixed_data.get('question_text', '').strip()
264
+ if current_passage:
265
+ fixed_data['question_text'] = f"{current_passage} {current_question}"
266
+ fixed_data['reading_passage'] = ""
267
 
 
 
 
 
 
 
 
 
 
 
268
  # Copy over unchanged fields
269
  for key in row:
270
  if key not in fixed_data and key != 'id':
271
  fixed_data[key] = row[key]
272
+
273
+ # Add the fix cost to the data
274
+ fixed_data['fix_cost'] = fix_cost
275
 
276
  return fixed_data
277
 
 
302
 
303
  def check_row_quality(row: Dict[str, Any]) -> bool:
304
  """
305
+ Check if a row meets quality standards.
306
+ Returns True if the row is good quality, False if it needs fixing.
307
  """
308
+ # Skip if already marked as fixed
309
+ if row.get('is_fixed', False):
310
  return True
311
 
312
+ # Check for image-related questions that should be deleted
313
+ question_text = row.get('question_text', '').lower()
314
+ reading_passage = row.get('reading_passage', '').lower()
315
+
316
+ # Keywords that indicate image-based questions
317
+ image_keywords = [
318
+ 'image', 'picture', 'diagram', 'graph', 'figure', 'photo', 'illustration',
319
+ 'shown', 'depicted', 'displayed', 'above', 'below', 'following figure',
320
+ 'look at the', 'in this picture', 'as shown', 'pictured'
321
  ]
322
+
323
+ # Check if question or passage refers to images
324
+ if any(keyword in question_text for keyword in image_keywords) or \
325
+ any(keyword in reading_passage for keyword in image_keywords):
326
+ logger.info(f"Row {row.get('id')}: Marked for deletion - contains image references")
327
+ return None # Return None to indicate deletion
328
+
329
+ # Basic validation for required fields
330
+ if not row.get('question_text') or not row.get('explanation'):
331
+ logger.info(f"Row {row.get('id')}: Marked for deletion - missing required fields")
332
+ return None
333
+
334
+ if not all(row.get(f'option_{opt}') for opt in ['a', 'b', 'c', 'd']):
335
+ logger.info(f"Row {row.get('id')}: Marked for deletion - missing options")
336
+ return None
337
+
338
+ if not is_valid_correct_answer(row.get('correct_answer', '')):
339
+ logger.info(f"Row {row.get('id')}: Marked for deletion - invalid correct answer")
340
+ return None
341
 
342
+ # Option quality checks
343
+ options = [row.get(f'option_{opt}', '').strip() for opt in ['a', 'b', 'c', 'd']]
344
+ if any(len(opt) < 1 for opt in options):
345
+ logger.info(f"Row {row.get('id')}: Marked for deletion - empty options")
346
+ return None
347
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  # Check for duplicate options
349
+ if len(set(options)) != 4:
350
+ logger.info(f"Row {row.get('id')}: Marked for deletion - duplicate options")
351
+ return None
352
+
353
+ # Basic explanation quality check
354
+ explanation = row.get('explanation', '')
355
+ if len(explanation) < 50 or not explanation.strip():
356
+ logger.info(f"Row {row.get('id')}: Marked for deletion - insufficient explanation")
357
+ return None
 
358
 
359
  return True
360
 
 
380
  return False
381
 
382
  def process_row(row: Dict[str, Any], progress_counter: AtomicCounter, total_rows: int, row_number: int) -> Dict[str, Any]:
383
+ """Process a single row and return the result."""
 
 
 
 
 
 
 
384
  try:
385
  row_id = row.get('id')
386
+
387
+ # Check quality first
388
+ quality_check = check_row_quality(row)
389
+
390
+ # If quality_check is None, delete the row
391
+ if quality_check is None:
392
+ try:
393
+ supabase.table("exam_contents").delete().eq("id", row_id).execute()
394
+ logger.info(f"Row {row_id}: Deleted due to quality issues.")
395
+ return {
396
+ 'success': True,
397
+ 'changes_made': ['deleted'],
398
+ 'row_id': row_id,
399
+ 'cost': 0.0
400
+ }
401
+ except Exception as e:
402
+ logger.error(f"Row {row_id}: Failed to delete - {str(e)}")
403
+ return {
404
+ 'success': False,
405
+ 'row_id': row_id,
406
+ 'cost': 0.0
407
+ }
408
+
409
+ # If row passes quality check, no need to fix
410
+ if quality_check is True:
411
+ # Update is_fixed flag
412
+ try:
413
+ supabase.table("exam_contents").update({"is_fixed": True}).eq("id", row_id).execute()
414
+ logger.info(f"Row {row_id}: Already good quality. Marked as fixed.")
415
+ return {
416
+ 'success': True,
417
+ 'changes_made': ['marked_fixed'],
418
+ 'row_id': row_id,
419
+ 'cost': 0.0
420
+ }
421
+ except Exception as e:
422
+ logger.error(f"Row {row_id}: Failed to update fixed status - {str(e)}")
423
+ return {
424
+ 'success': False,
425
+ 'row_id': row_id,
426
+ 'cost': 0.0
427
+ }
428
 
429
  # Generate fixed content
430
  fixed_data = generate_fixed_content(row)
431
  if not fixed_data:
432
+ logger.error(f"Row {row_id}: Failed to generate fixed content.")
433
+ return {
434
+ 'success': False,
435
+ 'row_id': row_id,
436
+ 'cost': 0.0
437
+ }
438
+
439
+ # Track what fields were modified
440
+ changes_made = []
441
+ for field in fixed_data:
442
+ if field in row and fixed_data[field] != row[field]:
443
+ changes_made.append(field)
444
+
445
+ if changes_made:
446
+ # Add is_fixed flag
447
+ fixed_data['is_fixed'] = True
448
+
449
+ # Update in database
450
+ try:
451
+ supabase.table("exam_contents").update(fixed_data).eq("id", row_id).execute()
452
+ change_list = ', '.join(changes_made)
453
+ logger.info(f"Row {row_id}: Fixed successfully. Modified: {change_list}")
454
+ return {
455
+ 'success': True,
456
+ 'changes_made': changes_made,
457
+ 'row_id': row_id,
458
+ 'cost': fixed_data.get('fix_cost', 0.0) # Include the fix cost
459
+ }
460
+ except Exception as e:
461
+ logger.error(f"Row {row_id}: Failed to update - {str(e)}")
462
+ return {
463
+ 'success': False,
464
+ 'row_id': row_id,
465
+ 'cost': 0.0
466
+ }
467
  else:
468
+ # No changes needed, just mark as fixed
469
+ try:
470
+ supabase.table("exam_contents").update({"is_fixed": True}).eq("id", row_id).execute()
471
+ logger.info(f"Row {row_id}: Fixed successfully. Modified: No changes needed")
472
+ return {
473
+ 'success': True,
474
+ 'changes_made': ['marked_fixed'],
475
+ 'row_id': row_id,
476
+ 'cost': fixed_data.get('fix_cost', 0.0) # Include the fix cost even if no changes
477
+ }
478
+ except Exception as e:
479
+ logger.error(f"Row {row_id}: Failed to update fixed status - {str(e)}")
480
+ return {
481
+ 'success': False,
482
+ 'row_id': row_id,
483
+ 'cost': 0.0
484
+ }
485
 
486
  except Exception as e:
487
+ logger.error(f"Error processing row {row.get('id', 'unknown')}: {str(e)}")
488
+ return {
489
+ 'success': False,
490
+ 'row_id': row.get('id', 'unknown'),
491
+ 'cost': 0.0
492
+ }
493
 
494
  def fetch_all_unfixed_rows(supabase_client: Client, batch_size: int = 1000):
495
  """
 
530
  total_rows = 0
531
  success_count = 0
532
  failure_count = 0
533
+ total_cost = 0.0
534
  changes_by_field = {
535
  'reading_passage': 0,
536
  'question_text': 0,
 
566
  # Update changes counter
567
  for field in result['changes_made']:
568
  changes_by_field[field] = changes_by_field.get(field, 0) + 1
569
+ # Add cost if available
570
+ if 'cost' in result:
571
+ total_cost += result['cost']
572
  else:
573
  failure_count += 1
574
  pbar.update(1)
 
582
  f"Total questions processed: {total_rows}",
583
  f"Successful updates: {success_count}",
584
  f"Failed updates: {failure_count}",
585
+ f"Total cost: ${total_cost:.6f}",
586
  f"Execution time: {execution_time:.2f} seconds",
587
  "\nChanges by field:",
588
  *[f"- {field}: {count}" for field, count in changes_by_field.items() if count > 0],