Alvaro commited on
Commit
38c6a34
·
1 Parent(s): e012a04

Refactor config imports and add JSON report for predictions

Browse files

Moved config.py to the project root and updated all imports to use the new location. Deleted the unused analyze.py script. Enhanced the prediction pipeline to save detailed model results to a JSON file, grouping predictions by event. Updated predict.main to generate a detailed report by default. Improved module import consistency in scrape modules.

src/analysis/elo.py CHANGED
@@ -1,9 +1,8 @@
1
  import csv
2
  import os
3
  from datetime import datetime
4
- import sys
5
 
6
- from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
7
 
8
  # --- ELO Configuration ---
9
  INITIAL_ELO = 1500
 
1
  import csv
2
  import os
3
  from datetime import datetime
 
4
 
5
+ from ..config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
6
 
7
  # --- ELO Configuration ---
8
  INITIAL_ELO = 1500
src/analyze.py DELETED
@@ -1,15 +0,0 @@
1
- import pandas as pd
2
-
3
- ufc_fights = pd.read_csv('output/ufc_fights.csv')
4
- ufc_fighters = pd.read_csv('output/ufc_fighters.csv')
5
-
6
- print(f"Number of fighters registered in UFC: {len(ufc_fighters)}")
7
- unique_fighters=set()
8
- for fight in ufc_fights['fighter_1']:
9
- unique_fighters.add(fight)
10
- for fight in ufc_fights['fighter_2']:
11
- unique_fighters.add(fight)
12
- print(f"Number of fighters who have at least one fight: {len(unique_fighters)}")
13
-
14
- highest_elo_fighters=ufc_fighters.sort_values(by='elo', ascending=False).head(20)
15
- print(highest_elo_fighters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/{scrape/config.py → config.py} RENAMED
@@ -1,9 +1,11 @@
1
  import os
2
 
3
  OUTPUT_DIR = 'output'
 
 
 
4
 
 
5
  EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
6
  FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
7
 
8
- FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
9
- FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
 
1
  import os
2
 
3
  OUTPUT_DIR = 'output'
4
+ FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
5
+ FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
6
+ MODEL_RESULTS_PATH = os.path.join(OUTPUT_DIR, 'model_results.json')
7
 
8
+ # JSON files (temporary)
9
  EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
10
  FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
11
 
 
 
src/main.py CHANGED
@@ -2,3 +2,4 @@
2
 
3
  # Run scrape.main
4
  # Run analysis.elo to add elo
 
 
2
 
3
  # Run scrape.main
4
  # Run analysis.elo to add elo
5
+ # Run predict.main for ML models
src/predict/main.py CHANGED
@@ -24,7 +24,7 @@ def main():
24
  pipeline = PredictionPipeline(models=models_to_run)
25
 
26
  # Set detailed_report=False for a summary, or True for a full detailed report
27
- pipeline.run(detailed_report=False)
28
 
29
  if __name__ == '__main__':
30
  main()
 
24
  pipeline = PredictionPipeline(models=models_to_run)
25
 
26
  # Set detailed_report=False for a summary, or True for a full detailed report
27
+ pipeline.run(detailed_report=True)
28
 
29
  if __name__ == '__main__':
30
  main()
src/predict/pipeline.py CHANGED
@@ -3,7 +3,9 @@ import os
3
  import sys
4
  from datetime import datetime
5
  from collections import OrderedDict
6
- from ..scrape.config import FIGHTS_CSV_PATH
 
 
7
  from .models import BaseModel
8
 
9
  class PredictionPipeline:
@@ -61,6 +63,7 @@ class PredictionPipeline:
61
  for fight in eval_fights:
62
  f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
63
  actual_winner = fight['winner']
 
64
  predicted_winner = model.predict(f1_name, f2_name)
65
 
66
  is_correct = (predicted_winner == actual_winner)
@@ -69,6 +72,7 @@ class PredictionPipeline:
69
 
70
  predictions.append({
71
  'fight': f"{f1_name} vs. {f2_name}",
 
72
  'predicted_winner': predicted_winner,
73
  'actual_winner': actual_winner,
74
  'is_correct': is_correct
@@ -95,17 +99,38 @@ class PredictionPipeline:
95
  print(f"{model_name:<25} | {result['accuracy']:<9.2f}% | {result['total_fights']:<20}")
96
  print("-" * 65)
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def _report_detailed_results(self):
99
- """Prints a summary and detailed report of the model evaluations."""
100
  print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
101
- for model_name, result in self.results.items():
102
- print(f"\n--- Model: {model_name} ---")
103
- print(f" Overall Accuracy: {result['accuracy']:.2f}%")
104
- print(" Detailed Predictions:")
105
- for p in result['predictions']:
106
- status = "CORRECT" if p['is_correct'] else "INCORRECT"
107
- print(f" - Fight: {p['fight']}")
108
- print(f" -> Predicted: {p['predicted_winner']}")
109
- print(f" -> Actual: {p['actual_winner']}")
110
- print(f" -> Result: {status}")
111
- print("------------------------" + "-" * len(model_name))
 
3
  import sys
4
  from datetime import datetime
5
  from collections import OrderedDict
6
+ import json
7
+
8
+ from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH
9
  from .models import BaseModel
10
 
11
  class PredictionPipeline:
 
63
  for fight in eval_fights:
64
  f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
65
  actual_winner = fight['winner']
66
+ event_name = fight.get('event_name', 'Unknown Event')
67
  predicted_winner = model.predict(f1_name, f2_name)
68
 
69
  is_correct = (predicted_winner == actual_winner)
 
72
 
73
  predictions.append({
74
  'fight': f"{f1_name} vs. {f2_name}",
75
+ 'event': event_name,
76
  'predicted_winner': predicted_winner,
77
  'actual_winner': actual_winner,
78
  'is_correct': is_correct
 
99
  print(f"{model_name:<25} | {result['accuracy']:<9.2f}% | {result['total_fights']:<20}")
100
  print("-" * 65)
101
 
102
+ def _save_report_to_json(self, file_path=MODEL_RESULTS_PATH):
103
+ """Saves the detailed prediction results to a JSON file."""
104
+ print(f"\nSaving detailed report to {file_path}...")
105
+ try:
106
+ # Create a report structure that is clean and JSON-friendly
107
+ report = {}
108
+ for model_name, result in self.results.items():
109
+
110
+ # Group predictions by event for a more organized report
111
+ predictions_by_event = {}
112
+ for p in result['predictions']:
113
+ event_name = p.pop('event') # Extract event and remove it from the sub-dictionary
114
+ if event_name not in predictions_by_event:
115
+ predictions_by_event[event_name] = []
116
+ predictions_by_event[event_name].append(p)
117
+
118
+ report[model_name] = {
119
+ "overall_accuracy": f"{result['accuracy']:.2f}%",
120
+ "total_fights_evaluated": result['total_fights'],
121
+ "predictions_by_event": predictions_by_event
122
+ }
123
+
124
+ with open(file_path, 'w', encoding='utf-8') as f:
125
+ json.dump(report, f, indent=4)
126
+ print("Report saved successfully.")
127
+ except (IOError, TypeError) as e:
128
+ print(f"Error saving report to JSON file: {e}")
129
+
130
  def _report_detailed_results(self):
131
+ """Prints a summary and saves the detailed report to a file."""
132
  print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
133
+ # A summary is printed to the console for convenience.
134
+ self._report_summary()
135
+ # The detailed report is now saved to a JSON file.
136
+ self._save_report_to_json()
 
 
 
 
 
 
 
src/scrape/main.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
  import json
3
- from scrape_fights import scrape_all_events
4
- from scrape_fighters import scrape_all_fighters
5
- from to_csv import json_to_csv, fighters_json_to_csv
6
- from preprocess import preprocess_fighters_csv
7
- import config
8
 
9
  def main():
10
  """
 
1
  import os
2
  import json
3
+ from .scrape_fights import scrape_all_events
4
+ from .scrape_fighters import scrape_all_fighters
5
+ from .to_csv import json_to_csv, fighters_json_to_csv
6
+ from .preprocess import preprocess_fighters_csv
7
+ from .. import config
8
 
9
  def main():
10
  """
src/scrape/preprocess.py CHANGED
@@ -1,6 +1,6 @@
1
  import csv
2
  import os
3
- import config
4
 
5
  def convert_height_to_cm(height_str):
6
  """
 
1
  import csv
2
  import os
3
+ from .. import config
4
 
5
  def convert_height_to_cm(height_str):
6
  """
src/scrape/scrape_fighters.py CHANGED
@@ -5,7 +5,7 @@ import time
5
  import string
6
  import concurrent.futures
7
  import os
8
- import config
9
 
10
  # --- Configuration ---
11
  # The number of parallel threads to use for scraping fighter details.
 
5
  import string
6
  import concurrent.futures
7
  import os
8
+ from .. import config
9
 
10
  # --- Configuration ---
11
  # The number of parallel threads to use for scraping fighter details.
src/scrape/scrape_fights.py CHANGED
@@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
3
  import json
4
  import time
5
  import concurrent.futures
6
- from config import EVENTS_JSON_PATH
7
 
8
  # --- Configuration ---
9
  # The number of parallel threads to use for scraping fight details.
 
3
  import json
4
  import time
5
  import concurrent.futures
6
+ from ..config import EVENTS_JSON_PATH
7
 
8
  # --- Configuration ---
9
  # The number of parallel threads to use for scraping fight details.
src/scrape/to_csv.py CHANGED
@@ -1,6 +1,6 @@
1
  import json
2
  import csv
3
- import config
4
 
5
  def json_to_csv(json_file_path, csv_file_path):
6
  try:
 
1
  import json
2
  import csv
3
+ from .. import config
4
 
5
  def json_to_csv(json_file_path, csv_file_path):
6
  try: