Spaces:
Sleeping
Sleeping
Alvaro
commited on
Commit
·
38c6a34
1
Parent(s):
e012a04
Refactor config imports and add JSON report for predictions
Browse filesMoved config.py to the project root and updated all imports to use the new location. Deleted the unused analyze.py script. Enhanced the prediction pipeline to save detailed model results to a JSON file, grouping predictions by event. Updated predict.main to generate a detailed report by default. Improved module import consistency in scrape modules.
- src/analysis/elo.py +1 -2
- src/analyze.py +0 -15
- src/{scrape/config.py → config.py} +4 -2
- src/main.py +1 -0
- src/predict/main.py +1 -1
- src/predict/pipeline.py +38 -13
- src/scrape/main.py +5 -5
- src/scrape/preprocess.py +1 -1
- src/scrape/scrape_fighters.py +1 -1
- src/scrape/scrape_fights.py +1 -1
- src/scrape/to_csv.py +1 -1
src/analysis/elo.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import csv
|
2 |
import os
|
3 |
from datetime import datetime
|
4 |
-
import sys
|
5 |
|
6 |
-
from ..
|
7 |
|
8 |
# --- ELO Configuration ---
|
9 |
INITIAL_ELO = 1500
|
|
|
1 |
import csv
|
2 |
import os
|
3 |
from datetime import datetime
|
|
|
4 |
|
5 |
+
from ..config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
|
6 |
|
7 |
# --- ELO Configuration ---
|
8 |
INITIAL_ELO = 1500
|
src/analyze.py
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
|
3 |
-
ufc_fights = pd.read_csv('output/ufc_fights.csv')
|
4 |
-
ufc_fighters = pd.read_csv('output/ufc_fighters.csv')
|
5 |
-
|
6 |
-
print(f"Number of fighters registered in UFC: {len(ufc_fighters)}")
|
7 |
-
unique_fighters=set()
|
8 |
-
for fight in ufc_fights['fighter_1']:
|
9 |
-
unique_fighters.add(fight)
|
10 |
-
for fight in ufc_fights['fighter_2']:
|
11 |
-
unique_fighters.add(fight)
|
12 |
-
print(f"Number of fighters who have at least one fight: {len(unique_fighters)}")
|
13 |
-
|
14 |
-
highest_elo_fighters=ufc_fighters.sort_values(by='elo', ascending=False).head(20)
|
15 |
-
print(highest_elo_fighters)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/{scrape/config.py → config.py}
RENAMED
@@ -1,9 +1,11 @@
|
|
1 |
import os
|
2 |
|
3 |
OUTPUT_DIR = 'output'
|
|
|
|
|
|
|
4 |
|
|
|
5 |
EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
|
6 |
FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
|
7 |
|
8 |
-
FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
|
9 |
-
FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
|
|
|
1 |
import os
|
2 |
|
3 |
OUTPUT_DIR = 'output'
|
4 |
+
FIGHTS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.csv')
|
5 |
+
FIGHTERS_CSV_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.csv')
|
6 |
+
MODEL_RESULTS_PATH = os.path.join(OUTPUT_DIR, 'model_results.json')
|
7 |
|
8 |
+
# JSON files (temporary)
|
9 |
EVENTS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fights.json')
|
10 |
FIGHTERS_JSON_PATH = os.path.join(OUTPUT_DIR, 'ufc_fighters.json')
|
11 |
|
|
|
|
src/main.py
CHANGED
@@ -2,3 +2,4 @@
|
|
2 |
|
3 |
# Run scrape.main
|
4 |
# Run analysis.elo to add elo
|
|
|
|
2 |
|
3 |
# Run scrape.main
|
4 |
# Run analysis.elo to add elo
|
5 |
+
# Run predict.main for ML models
|
src/predict/main.py
CHANGED
@@ -24,7 +24,7 @@ def main():
|
|
24 |
pipeline = PredictionPipeline(models=models_to_run)
|
25 |
|
26 |
# Set detailed_report=False for a summary, or True for a full detailed report
|
27 |
-
pipeline.run(detailed_report=
|
28 |
|
29 |
if __name__ == '__main__':
|
30 |
main()
|
|
|
24 |
pipeline = PredictionPipeline(models=models_to_run)
|
25 |
|
26 |
# Set detailed_report=False for a summary, or True for a full detailed report
|
27 |
+
pipeline.run(detailed_report=True)
|
28 |
|
29 |
if __name__ == '__main__':
|
30 |
main()
|
src/predict/pipeline.py
CHANGED
@@ -3,7 +3,9 @@ import os
|
|
3 |
import sys
|
4 |
from datetime import datetime
|
5 |
from collections import OrderedDict
|
6 |
-
|
|
|
|
|
7 |
from .models import BaseModel
|
8 |
|
9 |
class PredictionPipeline:
|
@@ -61,6 +63,7 @@ class PredictionPipeline:
|
|
61 |
for fight in eval_fights:
|
62 |
f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
|
63 |
actual_winner = fight['winner']
|
|
|
64 |
predicted_winner = model.predict(f1_name, f2_name)
|
65 |
|
66 |
is_correct = (predicted_winner == actual_winner)
|
@@ -69,6 +72,7 @@ class PredictionPipeline:
|
|
69 |
|
70 |
predictions.append({
|
71 |
'fight': f"{f1_name} vs. {f2_name}",
|
|
|
72 |
'predicted_winner': predicted_winner,
|
73 |
'actual_winner': actual_winner,
|
74 |
'is_correct': is_correct
|
@@ -95,17 +99,38 @@ class PredictionPipeline:
|
|
95 |
print(f"{model_name:<25} | {result['accuracy']:<9.2f}% | {result['total_fights']:<20}")
|
96 |
print("-" * 65)
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def _report_detailed_results(self):
|
99 |
-
"""Prints a summary and detailed report
|
100 |
print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
for p in result['predictions']:
|
106 |
-
status = "CORRECT" if p['is_correct'] else "INCORRECT"
|
107 |
-
print(f" - Fight: {p['fight']}")
|
108 |
-
print(f" -> Predicted: {p['predicted_winner']}")
|
109 |
-
print(f" -> Actual: {p['actual_winner']}")
|
110 |
-
print(f" -> Result: {status}")
|
111 |
-
print("------------------------" + "-" * len(model_name))
|
|
|
3 |
import sys
|
4 |
from datetime import datetime
|
5 |
from collections import OrderedDict
|
6 |
+
import json
|
7 |
+
|
8 |
+
from ..config import FIGHTS_CSV_PATH, MODEL_RESULTS_PATH
|
9 |
from .models import BaseModel
|
10 |
|
11 |
class PredictionPipeline:
|
|
|
63 |
for fight in eval_fights:
|
64 |
f1_name, f2_name = fight['fighter_1'], fight['fighter_2']
|
65 |
actual_winner = fight['winner']
|
66 |
+
event_name = fight.get('event_name', 'Unknown Event')
|
67 |
predicted_winner = model.predict(f1_name, f2_name)
|
68 |
|
69 |
is_correct = (predicted_winner == actual_winner)
|
|
|
72 |
|
73 |
predictions.append({
|
74 |
'fight': f"{f1_name} vs. {f2_name}",
|
75 |
+
'event': event_name,
|
76 |
'predicted_winner': predicted_winner,
|
77 |
'actual_winner': actual_winner,
|
78 |
'is_correct': is_correct
|
|
|
99 |
print(f"{model_name:<25} | {result['accuracy']:<9.2f}% | {result['total_fights']:<20}")
|
100 |
print("-" * 65)
|
101 |
|
102 |
+
def _save_report_to_json(self, file_path=MODEL_RESULTS_PATH):
|
103 |
+
"""Saves the detailed prediction results to a JSON file."""
|
104 |
+
print(f"\nSaving detailed report to {file_path}...")
|
105 |
+
try:
|
106 |
+
# Create a report structure that is clean and JSON-friendly
|
107 |
+
report = {}
|
108 |
+
for model_name, result in self.results.items():
|
109 |
+
|
110 |
+
# Group predictions by event for a more organized report
|
111 |
+
predictions_by_event = {}
|
112 |
+
for p in result['predictions']:
|
113 |
+
event_name = p.pop('event') # Extract event and remove it from the sub-dictionary
|
114 |
+
if event_name not in predictions_by_event:
|
115 |
+
predictions_by_event[event_name] = []
|
116 |
+
predictions_by_event[event_name].append(p)
|
117 |
+
|
118 |
+
report[model_name] = {
|
119 |
+
"overall_accuracy": f"{result['accuracy']:.2f}%",
|
120 |
+
"total_fights_evaluated": result['total_fights'],
|
121 |
+
"predictions_by_event": predictions_by_event
|
122 |
+
}
|
123 |
+
|
124 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
125 |
+
json.dump(report, f, indent=4)
|
126 |
+
print("Report saved successfully.")
|
127 |
+
except (IOError, TypeError) as e:
|
128 |
+
print(f"Error saving report to JSON file: {e}")
|
129 |
+
|
130 |
def _report_detailed_results(self):
|
131 |
+
"""Prints a summary and saves the detailed report to a file."""
|
132 |
print("\n\n--- Prediction Pipeline Finished: Detailed Report ---")
|
133 |
+
# A summary is printed to the console for convenience.
|
134 |
+
self._report_summary()
|
135 |
+
# The detailed report is now saved to a JSON file.
|
136 |
+
self._save_report_to_json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/scrape/main.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
-
from scrape_fights import scrape_all_events
|
4 |
-
from scrape_fighters import scrape_all_fighters
|
5 |
-
from to_csv import json_to_csv, fighters_json_to_csv
|
6 |
-
from preprocess import preprocess_fighters_csv
|
7 |
-
import config
|
8 |
|
9 |
def main():
|
10 |
"""
|
|
|
1 |
import os
|
2 |
import json
|
3 |
+
from .scrape_fights import scrape_all_events
|
4 |
+
from .scrape_fighters import scrape_all_fighters
|
5 |
+
from .to_csv import json_to_csv, fighters_json_to_csv
|
6 |
+
from .preprocess import preprocess_fighters_csv
|
7 |
+
from .. import config
|
8 |
|
9 |
def main():
|
10 |
"""
|
src/scrape/preprocess.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import csv
|
2 |
import os
|
3 |
-
import config
|
4 |
|
5 |
def convert_height_to_cm(height_str):
|
6 |
"""
|
|
|
1 |
import csv
|
2 |
import os
|
3 |
+
from .. import config
|
4 |
|
5 |
def convert_height_to_cm(height_str):
|
6 |
"""
|
src/scrape/scrape_fighters.py
CHANGED
@@ -5,7 +5,7 @@ import time
|
|
5 |
import string
|
6 |
import concurrent.futures
|
7 |
import os
|
8 |
-
import config
|
9 |
|
10 |
# --- Configuration ---
|
11 |
# The number of parallel threads to use for scraping fighter details.
|
|
|
5 |
import string
|
6 |
import concurrent.futures
|
7 |
import os
|
8 |
+
from .. import config
|
9 |
|
10 |
# --- Configuration ---
|
11 |
# The number of parallel threads to use for scraping fighter details.
|
src/scrape/scrape_fights.py
CHANGED
@@ -3,7 +3,7 @@ from bs4 import BeautifulSoup
|
|
3 |
import json
|
4 |
import time
|
5 |
import concurrent.futures
|
6 |
-
from config import EVENTS_JSON_PATH
|
7 |
|
8 |
# --- Configuration ---
|
9 |
# The number of parallel threads to use for scraping fight details.
|
|
|
3 |
import json
|
4 |
import time
|
5 |
import concurrent.futures
|
6 |
+
from ..config import EVENTS_JSON_PATH
|
7 |
|
8 |
# --- Configuration ---
|
9 |
# The number of parallel threads to use for scraping fight details.
|
src/scrape/to_csv.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import json
|
2 |
import csv
|
3 |
-
import config
|
4 |
|
5 |
def json_to_csv(json_file_path, csv_file_path):
|
6 |
try:
|
|
|
1 |
import json
|
2 |
import csv
|
3 |
+
from .. import config
|
4 |
|
5 |
def json_to_csv(json_file_path, csv_file_path):
|
6 |
try:
|