Spaces:
No application file
No application file
Liss, Alex (NYC-HUG)
commited on
Commit
Β·
d73640d
1
Parent(s):
f4e73a3
capturing and integrating multimedia assets part 1
Browse files- data/april_11_multimedia_data_collect/get_player_socials.py +239 -0
- data/april_11_multimedia_data_collect/get_youtube_playlist_videos.py +65 -0
- data/april_11_multimedia_data_collect/match_highlights.py +193 -0
- data/april_11_multimedia_data_collect/niners_players_headshots.csv +74 -0
- data/april_11_multimedia_data_collect/niners_players_headshots_with_socials_merged.csv +74 -0
- data/april_11_multimedia_data_collect/player_headshots.py +73 -0
- data/april_11_multimedia_data_collect/youtube_highlights.csv +0 -0
- data/april_11_multimedia_data_collect/z_old/niners_players_headshots_with_socials.csv +24 -0
- data/april_11_multimedia_data_collect/z_old/niners_players_headshots_with_socials_v1_safe.csv +51 -0
- data/{create_embeddings.py β z_old/create_embeddings.py} +0 -0
- data/{kml_cleanup.py β z_old/kml_cleanup.py} +0 -0
- data/{kmz_file_explorer.ipynb β z_old/kmz_file_explorer.ipynb} +0 -0
- data/{temp_unzipped β z_old/temp_unzipped}/doc.kml +0 -0
- data/{upload_embeddings.py β z_old/upload_embeddings.py} +0 -0
- data/{z_49ers_fan_chapters_DNU.csv β z_old/z_49ers_fan_chapters_DNU.csv} +0 -0
- data/{z_fan_chapters_clean_DNU.csv β z_old/z_fan_chapters_clean_DNU.csv} +0 -0
data/april_11_multimedia_data_collect/get_player_socials.py
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import csv
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import sys
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
# Load environment variables from .env file (for API key)
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# Get the directory where this script is located
|
13 |
+
SCRIPT_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
|
14 |
+
|
15 |
+
SERP_API_KEY = os.getenv("SERP_API_KEY") # Or just hardcode for testing, not recommended
|
16 |
+
|
17 |
+
def get_instagram_handle(query, timeout=10, retries=3, delay_between_retries=2):
|
18 |
+
"""
|
19 |
+
Uses SerpAPI to search for query: e.g. 'Brock Purdy Instagram'
|
20 |
+
Returns the best guess at Instagram handle/page URL if found, else empty string.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
query: Search query string
|
24 |
+
timeout: Request timeout in seconds
|
25 |
+
retries: Number of retries if request fails
|
26 |
+
delay_between_retries: Seconds to wait between retries
|
27 |
+
"""
|
28 |
+
if not SERP_API_KEY:
|
29 |
+
raise ValueError("SERP_API_KEY environment variable not set or provided!")
|
30 |
+
|
31 |
+
url = "https://serpapi.com/search"
|
32 |
+
params = {
|
33 |
+
"engine": "google",
|
34 |
+
"q": query,
|
35 |
+
"api_key": SERP_API_KEY,
|
36 |
+
}
|
37 |
+
|
38 |
+
for attempt in range(retries):
|
39 |
+
try:
|
40 |
+
print(f"[DEBUG] Sending API request for: {query}")
|
41 |
+
response = requests.get(url, params=params, timeout=timeout)
|
42 |
+
response.raise_for_status()
|
43 |
+
data = response.json()
|
44 |
+
|
45 |
+
# Check if we have organic results
|
46 |
+
if "organic_results" not in data:
|
47 |
+
print(f"[WARNING] No organic_results found in API response for {query}")
|
48 |
+
print(f"[DEBUG] Response keys: {list(data.keys())}")
|
49 |
+
return ""
|
50 |
+
|
51 |
+
# Typical structure: data['organic_results'] - parse each for relevant domain
|
52 |
+
results = data.get("organic_results", [])
|
53 |
+
print(f"[DEBUG] Found {len(results)} organic results")
|
54 |
+
|
55 |
+
for r in results:
|
56 |
+
link = r.get("link", "")
|
57 |
+
# If it has 'instagram.com', let's assume it's correct
|
58 |
+
if "instagram.com" in link.lower():
|
59 |
+
print(f"[DEBUG] Found Instagram link: {link}")
|
60 |
+
return link
|
61 |
+
|
62 |
+
print(f"[WARNING] No Instagram links found for {query}")
|
63 |
+
return ""
|
64 |
+
|
65 |
+
except requests.exceptions.Timeout:
|
66 |
+
print(f"[ERROR] Request timed out for {query} (attempt {attempt+1}/{retries})")
|
67 |
+
if attempt < retries - 1:
|
68 |
+
print(f"[INFO] Retrying in {delay_between_retries} seconds...")
|
69 |
+
time.sleep(delay_between_retries)
|
70 |
+
else:
|
71 |
+
print(f"[ERROR] All retries failed for {query}")
|
72 |
+
return ""
|
73 |
+
|
74 |
+
except requests.exceptions.RequestException as e:
|
75 |
+
print(f"[ERROR] Request failed for {query}: {str(e)} (attempt {attempt+1}/{retries})")
|
76 |
+
if attempt < retries - 1:
|
77 |
+
print(f"[INFO] Retrying in {delay_between_retries} seconds...")
|
78 |
+
time.sleep(delay_between_retries)
|
79 |
+
else:
|
80 |
+
print(f"[ERROR] All retries failed for {query}")
|
81 |
+
return ""
|
82 |
+
|
83 |
+
except Exception as e:
|
84 |
+
print(f"[ERROR] Unexpected error for {query}: {str(e)}")
|
85 |
+
return ""
|
86 |
+
|
87 |
+
def enrich_niners_socials(input_csv='niners_players_headshots.csv',
|
88 |
+
output_csv='niners_players_headshots_with_socials.csv',
|
89 |
+
delay_between_requests=1,
|
90 |
+
start_player=None,
|
91 |
+
max_players=None):
|
92 |
+
"""
|
93 |
+
Reads the roster CSV, queries Instagram for each player's best match,
|
94 |
+
then writes the results to a new CSV.
|
95 |
+
|
96 |
+
Args:
|
97 |
+
input_csv: Path to input CSV file
|
98 |
+
output_csv: Path to output CSV file
|
99 |
+
delay_between_requests: Seconds to wait between API requests to avoid rate limiting
|
100 |
+
start_player: Player number to start processing from (1-indexed)
|
101 |
+
max_players: Maximum number of players to process (None for all)
|
102 |
+
"""
|
103 |
+
# Convert relative paths to absolute paths based on script directory
|
104 |
+
if not os.path.isabs(input_csv):
|
105 |
+
input_csv = os.path.join(SCRIPT_DIR, input_csv)
|
106 |
+
if not os.path.isabs(output_csv):
|
107 |
+
output_csv = os.path.join(SCRIPT_DIR, output_csv)
|
108 |
+
|
109 |
+
print(f"[INFO] Input CSV path: {input_csv}")
|
110 |
+
print(f"[INFO] Output CSV path: {output_csv}")
|
111 |
+
if not SERP_API_KEY:
|
112 |
+
print("[ERROR] SERP_API_KEY not set. Please set your environment variable or update the script.")
|
113 |
+
return
|
114 |
+
|
115 |
+
# Check if input file exists
|
116 |
+
if not os.path.exists(input_csv):
|
117 |
+
print(f"[ERROR] Input file not found: {input_csv}")
|
118 |
+
return
|
119 |
+
|
120 |
+
try:
|
121 |
+
# Read existing output CSV if it exists to continue from where we left off
|
122 |
+
existing_data = []
|
123 |
+
if os.path.exists(output_csv):
|
124 |
+
with open(output_csv, 'r', encoding='utf-8') as f_existing:
|
125 |
+
existing_reader = csv.DictReader(f_existing)
|
126 |
+
existing_data = list(existing_reader)
|
127 |
+
print(f"[INFO] Loaded {len(existing_data)} existing entries")
|
128 |
+
|
129 |
+
# Count total players for progress reporting
|
130 |
+
with open(input_csv, 'r', encoding='utf-8') as f:
|
131 |
+
total_players = sum(1 for _ in csv.DictReader(f))
|
132 |
+
|
133 |
+
print(f"[INFO] Total players: {total_players}")
|
134 |
+
|
135 |
+
# Determine start and end points
|
136 |
+
start_index = start_player - 1 if start_player is not None else len(existing_data)
|
137 |
+
end_index = min(total_players, start_index + (max_players or total_players))
|
138 |
+
|
139 |
+
print(f"[INFO] Will process players from {start_index + 1} to {end_index}")
|
140 |
+
|
141 |
+
# Reopen input CSV to start processing
|
142 |
+
with open(input_csv, 'r', encoding='utf-8') as f:
|
143 |
+
reader = csv.DictReader(f)
|
144 |
+
input_fieldnames = reader.fieldnames
|
145 |
+
|
146 |
+
# Skip to the start player
|
147 |
+
for _ in range(start_index):
|
148 |
+
next(reader)
|
149 |
+
|
150 |
+
# Process remaining players
|
151 |
+
for i, row in enumerate(reader, start_index + 1):
|
152 |
+
if i > end_index:
|
153 |
+
print(f"[INFO] Reached maximum number of players. Stopping.")
|
154 |
+
break
|
155 |
+
|
156 |
+
player_name = row['name']
|
157 |
+
print(f"[INFO] Processing player {i}/{end_index}: {player_name}")
|
158 |
+
|
159 |
+
# Skip if already processed
|
160 |
+
if any(existing_row['name'] == player_name for existing_row in existing_data):
|
161 |
+
print(f"[INFO] {player_name} already processed. Skipping.")
|
162 |
+
continue
|
163 |
+
|
164 |
+
# Construct a query like 'PLAYER NAME instagram'
|
165 |
+
query = f"{player_name} NFL 49ers instagram"
|
166 |
+
|
167 |
+
try:
|
168 |
+
insta_url = get_instagram_handle(query)
|
169 |
+
row['instagram_url'] = insta_url
|
170 |
+
|
171 |
+
# Print result
|
172 |
+
if insta_url:
|
173 |
+
print(f"[SUCCESS] Found Instagram for {player_name}: {insta_url}")
|
174 |
+
else:
|
175 |
+
print(f"[WARNING] No Instagram found for {player_name}")
|
176 |
+
|
177 |
+
# Append new data
|
178 |
+
existing_data.append(row)
|
179 |
+
|
180 |
+
# Save progress after each player
|
181 |
+
with open(output_csv, 'w', newline='', encoding='utf-8') as f_out:
|
182 |
+
output_fieldnames = input_fieldnames + ['instagram_url']
|
183 |
+
writer = csv.DictWriter(f_out, fieldnames=output_fieldnames)
|
184 |
+
writer.writeheader()
|
185 |
+
writer.writerows(existing_data)
|
186 |
+
|
187 |
+
# Add delay between requests to avoid rate limiting
|
188 |
+
if i < end_index:
|
189 |
+
print(f"[INFO] Waiting {delay_between_requests} seconds before next request...")
|
190 |
+
time.sleep(delay_between_requests)
|
191 |
+
|
192 |
+
except KeyboardInterrupt:
|
193 |
+
print("\n[INFO] Process interrupted by user. Saving progress...")
|
194 |
+
break
|
195 |
+
|
196 |
+
print(f"[INFO] Social data saved to {output_csv}")
|
197 |
+
print(f"[INFO] Processed {len(existing_data)}/{total_players} players")
|
198 |
+
|
199 |
+
except Exception as e:
|
200 |
+
print(f"[ERROR] An unexpected error occurred: {str(e)}")
|
201 |
+
# Try to save any data collected so far
|
202 |
+
if existing_data:
|
203 |
+
try:
|
204 |
+
with open(output_csv, 'w', newline='', encoding='utf-8') as f_out:
|
205 |
+
output_fieldnames = input_fieldnames + ['instagram_url']
|
206 |
+
writer = csv.DictWriter(f_out, fieldnames=output_fieldnames)
|
207 |
+
writer.writeheader()
|
208 |
+
writer.writerows(existing_data)
|
209 |
+
print(f"[INFO] Partial data saved to {output_csv}")
|
210 |
+
except Exception:
|
211 |
+
print("[ERROR] Failed to save partial data")
|
212 |
+
|
213 |
+
if __name__ == "__main__":
|
214 |
+
print("[INFO] Starting player social media enrichment script")
|
215 |
+
|
216 |
+
# Parse command line arguments
|
217 |
+
delay = 1 # Default delay
|
218 |
+
start_player = 51 # Default to start from 51st player
|
219 |
+
max_players = None # Process all remaining players
|
220 |
+
|
221 |
+
if len(sys.argv) > 1:
|
222 |
+
try:
|
223 |
+
delay = float(sys.argv[1])
|
224 |
+
print(f"[INFO] Using custom delay between requests: {delay} seconds")
|
225 |
+
except ValueError:
|
226 |
+
print(f"[WARNING] Invalid delay value: {sys.argv[1]}. Using default: 1 second")
|
227 |
+
|
228 |
+
if len(sys.argv) > 2:
|
229 |
+
try:
|
230 |
+
start_player = int(sys.argv[2])
|
231 |
+
print(f"[INFO] Will start processing from player {start_player}")
|
232 |
+
except ValueError:
|
233 |
+
print(f"[WARNING] Invalid start_player value: {sys.argv[2]}. Using default: 51")
|
234 |
+
|
235 |
+
enrich_niners_socials(
|
236 |
+
delay_between_requests=delay,
|
237 |
+
start_player=start_player,
|
238 |
+
max_players=max_players
|
239 |
+
)
|
data/april_11_multimedia_data_collect/get_youtube_playlist_videos.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import csv
|
3 |
+
from googleapiclient.discovery import build
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
# Load environment variables from .env file (for API key)
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
API_KEY = os.getenv("YOUTUBE_API_KEY") # Or replace with your key in code
|
11 |
+
# Example 49ers highlights playlist:
|
12 |
+
PLAYLIST_ID = "PLBB205pkCsyvZ6tjCh_m5s21D0eeYJ8Ly"
|
13 |
+
|
14 |
+
def get_youtube_videos(playlist_id=PLAYLIST_ID, output_csv='youtube_highlights.csv'):
|
15 |
+
"""
|
16 |
+
Fetches videos from a YouTube playlist (title, video ID, published date, etc.)
|
17 |
+
Writes output to CSV.
|
18 |
+
"""
|
19 |
+
if not API_KEY:
|
20 |
+
raise ValueError("YOUTUBE_API_KEY environment variable not set or provided!")
|
21 |
+
|
22 |
+
youtube = build('youtube', 'v3', developerKey=API_KEY)
|
23 |
+
|
24 |
+
video_data = []
|
25 |
+
page_token = None
|
26 |
+
|
27 |
+
while True:
|
28 |
+
playlist_req = youtube.playlistItems().list(
|
29 |
+
part="snippet",
|
30 |
+
playlistId=playlist_id,
|
31 |
+
maxResults=50,
|
32 |
+
pageToken=page_token
|
33 |
+
)
|
34 |
+
playlist_res = playlist_req.execute()
|
35 |
+
|
36 |
+
for item in playlist_res['items']:
|
37 |
+
snippet = item['snippet']
|
38 |
+
title = snippet['title']
|
39 |
+
description = snippet['description']
|
40 |
+
video_id = snippet['resourceId']['videoId']
|
41 |
+
published_at = snippet['publishedAt']
|
42 |
+
|
43 |
+
video_data.append({
|
44 |
+
"video_id": video_id,
|
45 |
+
"title": title,
|
46 |
+
"description": description,
|
47 |
+
"published_at": published_at,
|
48 |
+
"video_url": f"https://www.youtube.com/watch?v={video_id}"
|
49 |
+
})
|
50 |
+
|
51 |
+
page_token = playlist_res.get('nextPageToken')
|
52 |
+
if not page_token:
|
53 |
+
break
|
54 |
+
|
55 |
+
# Write to CSV
|
56 |
+
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
57 |
+
fieldnames = ["video_id", "title", "description", "published_at", "video_url"]
|
58 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
59 |
+
writer.writeheader()
|
60 |
+
writer.writerows(video_data)
|
61 |
+
|
62 |
+
print(f"[INFO] YouTube playlist data saved to {output_csv}")
|
63 |
+
|
64 |
+
if __name__ == "__main__":
|
65 |
+
get_youtube_videos()
|
data/april_11_multimedia_data_collect/match_highlights.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import re
|
3 |
+
from collections import defaultdict
|
4 |
+
|
5 |
+
# Define file paths
|
6 |
+
YOUTUBE_HIGHLIGHTS_PATH = "ifx-sandbox/data/april_11_multimedia_data_collect/youtube_highlights.csv"
|
7 |
+
PLAYERS_ROSTER_PATH = "ifx-sandbox/data/april_11_multimedia_data_collect/niners_players_headshots_with_socials_merged.csv"
|
8 |
+
GAMES_SCHEDULE_PATH = "ifx-sandbox/data/nfl-2024-san-francisco-49ers-with-results.csv"
|
9 |
+
OUTPUT_PLAYERS_PATH = "ifx-sandbox/data/niners_output/players_with_highlights.csv"
|
10 |
+
OUTPUT_GAMES_PATH = "ifx-sandbox/data/niners_output/games_with_highlights.csv"
|
11 |
+
OUTPUT_TEAM_VIDEOS_PATH = "ifx-sandbox/data/niners_output/team_highlights.csv"
|
12 |
+
|
13 |
+
def load_youtube_highlights():
|
14 |
+
"""Load YouTube highlights data from CSV file."""
|
15 |
+
highlights = []
|
16 |
+
with open(YOUTUBE_HIGHLIGHTS_PATH, 'r', encoding='utf-8') as file:
|
17 |
+
reader = csv.DictReader(file)
|
18 |
+
for row in reader:
|
19 |
+
highlights.append({
|
20 |
+
'video_id': row['video_id'],
|
21 |
+
'title': row['title'],
|
22 |
+
'description': row['description'],
|
23 |
+
'published_at': row['published_at'],
|
24 |
+
'video_url': row['video_url']
|
25 |
+
})
|
26 |
+
return highlights
|
27 |
+
|
28 |
+
def load_players():
|
29 |
+
"""Load player roster data from CSV file."""
|
30 |
+
players = []
|
31 |
+
with open(PLAYERS_ROSTER_PATH, 'r', encoding='utf-8') as file:
|
32 |
+
reader = csv.DictReader(file)
|
33 |
+
for row in reader:
|
34 |
+
players.append({
|
35 |
+
'name': row['name'],
|
36 |
+
'headshot_url': row['headshot_url'],
|
37 |
+
'instagram_url': row['instagram_url'],
|
38 |
+
'highlight_video_url': '' # Initialize with empty string
|
39 |
+
})
|
40 |
+
return players
|
41 |
+
|
42 |
+
def load_games():
|
43 |
+
"""Load game schedule data from CSV file."""
|
44 |
+
games = []
|
45 |
+
with open(GAMES_SCHEDULE_PATH, 'r', encoding='utf-8') as file:
|
46 |
+
reader = csv.DictReader(file)
|
47 |
+
for row in reader:
|
48 |
+
opponent = row['Away Team'] if row['Home Team'] == 'San Francisco 49ers' else row['Home Team']
|
49 |
+
opponent = opponent.replace('San Francisco 49ers', '').strip()
|
50 |
+
|
51 |
+
games.append({
|
52 |
+
'match_number': row['Match Number'],
|
53 |
+
'round_number': row['Round Number'],
|
54 |
+
'date': row['Date'],
|
55 |
+
'location': row['Location'],
|
56 |
+
'home_team': row['Home Team'],
|
57 |
+
'away_team': row['Away Team'],
|
58 |
+
'result': row['Result'],
|
59 |
+
'game_result': row['game_result'],
|
60 |
+
'opponent': opponent,
|
61 |
+
'highlight_video_url': '' # Initialize with empty string
|
62 |
+
})
|
63 |
+
return games
|
64 |
+
|
65 |
+
def match_highlights_to_players_and_games(highlights, players, games):
|
66 |
+
"""Match YouTube highlights to players and games."""
|
67 |
+
# Create a copy of highlights to track which ones are assigned
|
68 |
+
unassigned_highlights = highlights.copy()
|
69 |
+
|
70 |
+
# Track assigned videos
|
71 |
+
assigned_video_ids = set()
|
72 |
+
|
73 |
+
# Match players first
|
74 |
+
for player in players:
|
75 |
+
player_name = player['name']
|
76 |
+
first_name = player_name.split()[0]
|
77 |
+
last_name = player_name.split()[-1]
|
78 |
+
|
79 |
+
# Create patterns to match player names
|
80 |
+
full_name_pattern = re.compile(r'\b' + re.escape(player_name) + r'\b', re.IGNORECASE)
|
81 |
+
last_name_pattern = re.compile(r'\b' + re.escape(last_name) + r'\b', re.IGNORECASE)
|
82 |
+
|
83 |
+
# Try to find a match in the unassigned highlights
|
84 |
+
for highlight in unassigned_highlights:
|
85 |
+
if highlight['video_id'] in assigned_video_ids:
|
86 |
+
continue
|
87 |
+
|
88 |
+
title = highlight['title']
|
89 |
+
description = highlight['description']
|
90 |
+
|
91 |
+
# Check for full name match in title first (most specific)
|
92 |
+
if full_name_pattern.search(title):
|
93 |
+
player['highlight_video_url'] = highlight['video_url']
|
94 |
+
assigned_video_ids.add(highlight['video_id'])
|
95 |
+
break
|
96 |
+
|
97 |
+
# Then check for last name match in title
|
98 |
+
elif last_name_pattern.search(title):
|
99 |
+
player['highlight_video_url'] = highlight['video_url']
|
100 |
+
assigned_video_ids.add(highlight['video_id'])
|
101 |
+
break
|
102 |
+
|
103 |
+
# Match games next
|
104 |
+
for game in games:
|
105 |
+
opponent = game['opponent']
|
106 |
+
week_pattern = re.compile(r'\bWeek\s+' + re.escape(game['round_number']) + r'\b', re.IGNORECASE)
|
107 |
+
opponent_pattern = re.compile(r'\b' + re.escape(opponent) + r'\b', re.IGNORECASE)
|
108 |
+
|
109 |
+
# Try to find a match in the unassigned highlights
|
110 |
+
for highlight in unassigned_highlights:
|
111 |
+
if highlight['video_id'] in assigned_video_ids:
|
112 |
+
continue
|
113 |
+
|
114 |
+
title = highlight['title']
|
115 |
+
description = highlight['description']
|
116 |
+
|
117 |
+
# Check for both week and opponent match in title (most specific)
|
118 |
+
if week_pattern.search(title) and opponent_pattern.search(title):
|
119 |
+
game['highlight_video_url'] = highlight['video_url']
|
120 |
+
assigned_video_ids.add(highlight['video_id'])
|
121 |
+
break
|
122 |
+
|
123 |
+
# Then check for opponent match in title
|
124 |
+
elif opponent_pattern.search(title):
|
125 |
+
game['highlight_video_url'] = highlight['video_url']
|
126 |
+
assigned_video_ids.add(highlight['video_id'])
|
127 |
+
break
|
128 |
+
|
129 |
+
# Collect team videos (unassigned highlights)
|
130 |
+
team_videos = []
|
131 |
+
for highlight in highlights:
|
132 |
+
if highlight['video_id'] not in assigned_video_ids:
|
133 |
+
team_videos.append(highlight)
|
134 |
+
|
135 |
+
return team_videos
|
136 |
+
|
137 |
+
def save_players_with_highlights(players):
|
138 |
+
"""Save players with highlight videos to CSV file."""
|
139 |
+
with open(OUTPUT_PLAYERS_PATH, 'w', newline='', encoding='utf-8') as file:
|
140 |
+
fieldnames = ['name', 'headshot_url', 'instagram_url', 'highlight_video_url']
|
141 |
+
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
142 |
+
writer.writeheader()
|
143 |
+
for player in players:
|
144 |
+
writer.writerow(player)
|
145 |
+
|
146 |
+
def save_games_with_highlights(games):
|
147 |
+
"""Save games with highlight videos to CSV file."""
|
148 |
+
with open(OUTPUT_GAMES_PATH, 'w', newline='', encoding='utf-8') as file:
|
149 |
+
fieldnames = ['match_number', 'round_number', 'date', 'location', 'home_team', 'away_team',
|
150 |
+
'result', 'game_result', 'opponent', 'highlight_video_url']
|
151 |
+
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
152 |
+
writer.writeheader()
|
153 |
+
for game in games:
|
154 |
+
writer.writerow(game)
|
155 |
+
|
156 |
+
def save_team_videos(team_videos):
|
157 |
+
"""Save team videos to CSV file."""
|
158 |
+
with open(OUTPUT_TEAM_VIDEOS_PATH, 'w', newline='', encoding='utf-8') as file:
|
159 |
+
fieldnames = ['video_id', 'title', 'description', 'published_at', 'video_url']
|
160 |
+
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
161 |
+
writer.writeheader()
|
162 |
+
for video in team_videos:
|
163 |
+
writer.writerow(video)
|
164 |
+
|
165 |
+
def main():
|
166 |
+
# Load data
|
167 |
+
highlights = load_youtube_highlights()
|
168 |
+
players = load_players()
|
169 |
+
games = load_games()
|
170 |
+
|
171 |
+
# Match highlights to players and games
|
172 |
+
team_videos = match_highlights_to_players_and_games(highlights, players, games)
|
173 |
+
|
174 |
+
# Save results
|
175 |
+
save_players_with_highlights(players)
|
176 |
+
save_games_with_highlights(games)
|
177 |
+
save_team_videos(team_videos)
|
178 |
+
|
179 |
+
# Print summary
|
180 |
+
player_matches = sum(1 for player in players if player['highlight_video_url'])
|
181 |
+
game_matches = sum(1 for game in games if game['highlight_video_url'])
|
182 |
+
|
183 |
+
print(f"Total YouTube highlights: {len(highlights)}")
|
184 |
+
print(f"Players with highlight videos: {player_matches}/{len(players)}")
|
185 |
+
print(f"Games with highlight videos: {game_matches}/{len(games)}")
|
186 |
+
print(f"Team videos (unassigned): {len(team_videos)}")
|
187 |
+
print(f"\nOutput files created:")
|
188 |
+
print(f"- {OUTPUT_PLAYERS_PATH}")
|
189 |
+
print(f"- {OUTPUT_GAMES_PATH}")
|
190 |
+
print(f"- {OUTPUT_TEAM_VIDEOS_PATH}")
|
191 |
+
|
192 |
+
if __name__ == "__main__":
|
193 |
+
main()
|
data/april_11_multimedia_data_collect/niners_players_headshots.csv
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name,headshot_url
|
2 |
+
Israel Abanikanda,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wo7d9oli06eki4mnh3i8.png
|
3 |
+
Brandon Aiyuk,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/khwofxjjwx0hcaigzxhw.png
|
4 |
+
Isaac Alarcon,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mlhuuxukyusodzlfsmnv.jpg
|
5 |
+
Evan Anderson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/ng7oamywxvqgkx6l6kqc.png
|
6 |
+
Tre Avery,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/a7kfv7xjftqlaqghk6sg
|
7 |
+
Alex Barrett,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/bm0ay22de39d1enrxwiq.jpg
|
8 |
+
Ben Bartch,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/aqaslodzr7y0yvh5zzxa.jpg
|
9 |
+
Robert Beal Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/jwwhmt5d8mi0vdb8nfic.jpg
|
10 |
+
Tatum Bethune,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/vl08pinqpmoubdf0zy5s.png
|
11 |
+
Nick Bosa,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/utiwswqvpkiwtocijwhz.jpg
|
12 |
+
Jake Brendel,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/svsb41aekpzt3m9snilw.jpg
|
13 |
+
Ji'Ayir Brown,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/urillpic02z774n09xvf.jpg
|
14 |
+
Tre Brown,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/dpemqrrweakt8dci3qfb
|
15 |
+
Spencer Burford,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/lje3ae25dntkdudp6eex.jpg
|
16 |
+
Jacob Cowing,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/lg7aao0umc21oioufqdx.png
|
17 |
+
Kalia Davis,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/rmnxj3sh7pyldmcxqe32.jpg
|
18 |
+
Jordan Elliott,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/xbyky8r2yuzusd2tmrw8.jpg
|
19 |
+
Luke Farrell,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/f2z7wpmx7ngtxcqqedla
|
20 |
+
Russell Gage Jr.,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/lkqhshv0dss1b9c6mdnj
|
21 |
+
Jonathan Garvin,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/rapfcxut6vu50vcevswe.png
|
22 |
+
Luke Gifford,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/mhdbbzj8amttnpd1nbpn
|
23 |
+
Kevin Givens,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mstmgft0e0ancdzspboy.jpg
|
24 |
+
Jalen Graham,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/pbl2a1ujopvwqrfct0jp.jpg
|
25 |
+
Richie Grant,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/szeswtvt6jmbu3so3phd
|
26 |
+
Renardo Green,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/v79obx9v7tgcjjlo6hiy.png
|
27 |
+
Yetur Gross-Matos,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/etuaajmvhbc5qkebgoow.jpg
|
28 |
+
Isaac Guerendo,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/b66rpzr9iauo5rdprvka.png
|
29 |
+
Sebastian Gutierrez,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/am9sywgkga6jq65hvboe.jpg
|
30 |
+
Matt Hennessy,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/zk8b21o8ncxnyu0gyf23
|
31 |
+
Isaiah Hodgins,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ax1oft9kqida0eokvtes.jpg
|
32 |
+
Drake Jackson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/y2luyplzpvbzokyfbmla.jpg
|
33 |
+
Tarron Jackson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/pnqrjp76bgpkmacxma3r
|
34 |
+
Jauan Jennings,https://static.clubs.nfl.com/image/private/t_thumb_squared_2x/f_auto/49ers/wxsq7f4ajmhfs6tn4dg2.jpg
|
35 |
+
Quindell Johnson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/uga90lawcfxjcqna7opb
|
36 |
+
Zack Johnson,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/n4hy8uzhcl5cl0ricwoa
|
37 |
+
Mac Jones,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/pedpdxybeus7mrovsoko
|
38 |
+
Kyle Juszczyk,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ywdz6y2pfzndqgmxxfbj.jpg
|
39 |
+
George Kittle,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/elheepobwn1ahqwtfwat.jpg
|
40 |
+
Deommodore Lenoir,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/f9fnuvbpcxku9ibt9qs8.jpg
|
41 |
+
Chase Lucas,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/gjeejt5pbagnipodhdz4.jpg
|
42 |
+
Darrell Luter Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/g5rohvooet9g5w7rlhrh.jpg
|
43 |
+
Jaylen Mahoney,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/yv9inbia05nyxppuajv0.png
|
44 |
+
Christian McCaffrey,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/a8fka6shomakkbllljgt.jpg
|
45 |
+
Jalen McKenzie,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/gffxpns1ayxyjccymr6d.jpg
|
46 |
+
Colton McKivitz,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/jugvoxjabgsbcfbuqfew.jpg
|
47 |
+
Jake Moody,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ygputwsbutemszr8xxkw.jpg
|
48 |
+
Tanner Mordecai,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/y8gipodnkeapgmegnxs1.png
|
49 |
+
Malik Mustapha,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/eyrgxgpbrycd9x8glk0j.png
|
50 |
+
Siran Neal,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/muhthfs6owkkpsyop1e6
|
51 |
+
Drake Nugent,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/qyb4kurtbv9uflmupfnc.png
|
52 |
+
George Odum,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/sqpxhoycdpegkyjn6ooc.jpg
|
53 |
+
Sam Okuayinonu,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/fyolr2zk2nplfbdze75l.jpg
|
54 |
+
Terique Owens,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/okhin0uwdon2nimvbtwd.png
|
55 |
+
Ricky Pearsall,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/to7q7w4kjiajseb4ljcx.png
|
56 |
+
Jason Pinnock,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/on29awacb9frijyggtgt
|
57 |
+
Austen Pleasants,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wsbs5emdyzuc1sudbcls.png
|
58 |
+
Mason Pline,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/mvjlaxpu8bu33ohspqot.png
|
59 |
+
Dominick Puni,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/tq1snozjpjrgrjoflrfg.png
|
60 |
+
Brock Purdy,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wt42ykvuxpngm4m1axxn.png
|
61 |
+
Curtis Robinson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/x3xyzgeapcafr0gicl5y.jpg
|
62 |
+
Demarcus Robinson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/lakf0xue1qqb7ed4p6ge
|
63 |
+
Patrick Taylor Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/hochjncae0hqcoveuexq.jpg
|
64 |
+
Trent Taylor,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/j8lom4fnsveujt8hykef.jpg
|
65 |
+
Tre Tomlinson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/n5pfv126xw0psc0d1ydz
|
66 |
+
Jake Tonges,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/la3z5y6u7tix6rnq2m5l.jpg
|
67 |
+
Fred Warner,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/zo4ftfar4bshrbipceuk.jpg
|
68 |
+
Jon Weeks,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/d9fvm74pu4vyinveopbf
|
69 |
+
DaShaun White,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/mjnpmkw3ar6zcj2hxxzd
|
70 |
+
Trent Williams,https://static.clubs.nfl.com/image/private/t_thumb_squared_2x/f_auto/49ers/bnq8i5urjualxre5caqz.jpg
|
71 |
+
Brayden Willis,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/xmo7hsuho3ehmsjwvthc.jpg
|
72 |
+
Dee Winters,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ggf13riajo0kn0y6kbu0.jpg
|
73 |
+
Mitch Wishnowsky,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mkf1xr1x8nr9l55oq72a.jpg
|
74 |
+
Nick Zakelj,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/o92tva22zezdz4aksadl.jpg
|
data/april_11_multimedia_data_collect/niners_players_headshots_with_socials_merged.csv
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name,headshot_url,instagram_url,
|
2 |
+
Israel Abanikanda,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wo7d9oli06eki4mnh3i8.png,https://www.instagram.com/izzygetsbusy__/?hl=en,
|
3 |
+
Brandon Aiyuk,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/khwofxjjwx0hcaigzxhw.png,https://www.instagram.com/brandonaiyuk/?hl=en,
|
4 |
+
Isaac Alarcon,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mlhuuxukyusodzlfsmnv.jpg,https://www.instagram.com/isaac_algar/?hl=en,
|
5 |
+
Evan Anderson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/ng7oamywxvqgkx6l6kqc.png,https://www.instagram.com/klamps8/?hl=en,
|
6 |
+
Tre Avery,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/a7kfv7xjftqlaqghk6sg,https://www.instagram.com/t.avery21/?hl=en,
|
7 |
+
Alex Barrett,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/bm0ay22de39d1enrxwiq.jpg,https://www.instagram.com/alex.barrett/?hl=en,
|
8 |
+
Ben Bartch,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/aqaslodzr7y0yvh5zzxa.jpg,https://www.instagram.com/bartchben/,
|
9 |
+
Robert Beal Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/jwwhmt5d8mi0vdb8nfic.jpg,https://www.instagram.com/oursf49ers/reel/C_CVQxxp2ti/,
|
10 |
+
Tatum Bethune,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/vl08pinqpmoubdf0zy5s.png,https://www.instagram.com/tatumx15/?hl=en,
|
11 |
+
Nick Bosa,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/utiwswqvpkiwtocijwhz.jpg,https://www.instagram.com/nbsmallerbear/?hl=en,
|
12 |
+
Jake Brendel,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/svsb41aekpzt3m9snilw.jpg,https://www.instagram.com/jake.brendel/?hl=en,
|
13 |
+
Ji'Ayir Brown,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/urillpic02z774n09xvf.jpg,https://www.instagram.com/_tiig/?hl=en,
|
14 |
+
Tre Brown,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/dpemqrrweakt8dci3qfb,https://www.instagram.com/tre_brown25/?hl=en,
|
15 |
+
Spencer Burford,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/lje3ae25dntkdudp6eex.jpg,https://www.instagram.com/spence__74/?hl=en,
|
16 |
+
Jacob Cowing,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/lg7aao0umc21oioufqdx.png,https://www.instagram.com/jaycowing_/?hl=en,
|
17 |
+
Kalia Davis,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/rmnxj3sh7pyldmcxqe32.jpg,https://www.instagram.com/ucf.football/p/C3No6rTugDe/,
|
18 |
+
Jordan Elliott,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/xbyky8r2yuzusd2tmrw8.jpg,https://www.instagram.com/jordanelliott_nbcs/,
|
19 |
+
Luke Farrell,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/f2z7wpmx7ngtxcqqedla,https://www.instagram.com/lukefarrell89/?hl=en,
|
20 |
+
Russell Gage Jr.,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/lkqhshv0dss1b9c6mdnj,https://www.instagram.com/w8k3mupruss/?hl=en,
|
21 |
+
Jonathan Garvin,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/rapfcxut6vu50vcevswe.png,https://www.instagram.com/thesfniners/p/DCmgF8KSw2A/?hl=en,
|
22 |
+
Luke Gifford,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/mhdbbzj8amttnpd1nbpn,https://www.instagram.com/luke_gifford/?hl=en,
|
23 |
+
Kevin Givens,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mstmgft0e0ancdzspboy.jpg,https://www.instagram.com/49ers/p/DAg_Pvpz1vV/,
|
24 |
+
Jalen Graham,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/pbl2a1ujopvwqrfct0jp.jpg,https://www.instagram.com/thexniners/p/CruR8IPrSV7/,
|
25 |
+
Richie Grant,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/szeswtvt6jmbu3so3phd,https://www.instagram.com/richiegrant_/?hl=en,
|
26 |
+
Renardo Green,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/v79obx9v7tgcjjlo6hiy.png,https://www.instagram.com/dondada.8/?hl=en,
|
27 |
+
Yetur Gross-Matos,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/etuaajmvhbc5qkebgoow.jpg,https://www.instagram.com/__lobo99/?hl=en,
|
28 |
+
Isaac Guerendo,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/b66rpzr9iauo5rdprvka.png,https://www.instagram.com/isaac_guerendo/?hl=en,
|
29 |
+
Sebastian Gutierrez,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/am9sywgkga6jq65hvboe.jpg,https://www.instagram.com/sebastiandev1/?hl=en,
|
30 |
+
Matt Hennessy,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/zk8b21o8ncxnyu0gyf23,https://www.instagram.com/matt___hennessy/?hl=en,
|
31 |
+
Isaiah Hodgins,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ax1oft9kqida0eokvtes.jpg,https://www.instagram.com/isaiahhodgins/?hl=en,
|
32 |
+
Drake Jackson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/y2luyplzpvbzokyfbmla.jpg,https://www.instagram.com/thefreak/?hl=en,
|
33 |
+
Tarron Jackson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/pnqrjp76bgpkmacxma3r,https://www.instagram.com/tarron_jackson/?hl=en,
|
34 |
+
Jauan Jennings,https://static.clubs.nfl.com/image/private/t_thumb_squared_2x/f_auto/49ers/wxsq7f4ajmhfs6tn4dg2.jpg,https://www.instagram.com/u_aintjj/?hl=en,
|
35 |
+
Quindell Johnson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/uga90lawcfxjcqna7opb,https://www.instagram.com/p/DFGnwNlymc9/,
|
36 |
+
Zack Johnson,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/n4hy8uzhcl5cl0ricwoa,https://www.instagram.com/zack.johnson.68/,
|
37 |
+
Mac Jones,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/pedpdxybeus7mrovsoko,https://www.instagram.com/macjones_10/?hl=en,
|
38 |
+
Kyle Juszczyk,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ywdz6y2pfzndqgmxxfbj.jpg,https://www.instagram.com/juicecheck44/?hl=en,
|
39 |
+
George Kittle,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/elheepobwn1ahqwtfwat.jpg,https://www.instagram.com/gkittle/?hl=en,
|
40 |
+
Deommodore Lenoir,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/f9fnuvbpcxku9ibt9qs8.jpg,https://www.instagram.com/deommo.lenoir/?hl=en,
|
41 |
+
Chase Lucas,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/gjeejt5pbagnipodhdz4.jpg,https://www.instagram.com/chase_lucas24/?hl=en,
|
42 |
+
Darrell Luter Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/g5rohvooet9g5w7rlhrh.jpg,https://www.instagram.com/_d.ray4k/?hl=en,
|
43 |
+
Jaylen Mahoney,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/yv9inbia05nyxppuajv0.png,https://www.instagram.com/jaylenmahoney_/,
|
44 |
+
Christian McCaffrey,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/a8fka6shomakkbllljgt.jpg,https://www.instagram.com/christianmccaffrey/?hl=en,
|
45 |
+
Jalen McKenzie,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/gffxpns1ayxyjccymr6d.jpg,https://www.instagram.com/jay_peez70/?hl=en,
|
46 |
+
Colton McKivitz,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/jugvoxjabgsbcfbuqfew.jpg,https://www.instagram.com/cmckivitz53/?hl=en,
|
47 |
+
Jake Moody,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ygputwsbutemszr8xxkw.jpg,https://www.instagram.com/jmoods_/?hl=en,
|
48 |
+
Tanner Mordecai,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/y8gipodnkeapgmegnxs1.png,https://www.instagram.com/t_mordecai/?hl=en,
|
49 |
+
Malik Mustapha,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/eyrgxgpbrycd9x8glk0j.png,https://www.instagram.com/stapha/,
|
50 |
+
Siran Neal,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/muhthfs6owkkpsyop1e6,https://www.instagram.com/siranneal/?hl=en,
|
51 |
+
Drake Nugent,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/qyb4kurtbv9uflmupfnc.png,https://www.instagram.com/drakenugent9/?hl=en,
|
52 |
+
George Odum,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/sqpxhoycdpegkyjn6ooc.jpg,https://www.instagram.com/george.w.odum/?hl=en,
|
53 |
+
Sam Okuayinonu,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/fyolr2zk2nplfbdze75l.jpg,https://www.instagram.com/sam.ok97/?hl=en,
|
54 |
+
Terique Owens,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/okhin0uwdon2nimvbtwd.png,https://www.instagram.com/terique_owens/?hl=en,
|
55 |
+
Ricky Pearsall,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/to7q7w4kjiajseb4ljcx.png,https://www.instagram.com/ricky.pearsall/?hl=en,
|
56 |
+
Jason Pinnock,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/on29awacb9frijyggtgt,https://www.instagram.com/jpinny15/?hl=en,
|
57 |
+
Austen Pleasants,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wsbs5emdyzuc1sudbcls.png,https://www.instagram.com/oursf49ers/p/DDr48a4PdcO/?hl=en,
|
58 |
+
Mason Pline,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/mvjlaxpu8bu33ohspqot.png,https://www.instagram.com/mpline12/?hl=en,
|
59 |
+
Dominick Puni,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/tq1snozjpjrgrjoflrfg.png,https://www.instagram.com/dompuni/?hl=en,
|
60 |
+
Brock Purdy,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wt42ykvuxpngm4m1axxn.png,https://www.instagram.com/brock.purdy13/?hl=en,
|
61 |
+
Curtis Robinson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/x3xyzgeapcafr0gicl5y.jpg,https://www.instagram.com/curtis_robinsonn/?hl=en,
|
62 |
+
Demarcus Robinson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/lakf0xue1qqb7ed4p6ge,https://www.instagram.com/demarcusrobinson/?hl=en,
|
63 |
+
Patrick Taylor Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/hochjncae0hqcoveuexq.jpg,https://www.instagram.com/patricktaylor/?hl=en,
|
64 |
+
Trent Taylor,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/j8lom4fnsveujt8hykef.jpg,https://www.instagram.com/trent5taylor/?hl=en,
|
65 |
+
Tre Tomlinson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/n5pfv126xw0psc0d1ydz,https://www.instagram.com/trevius/?hl=en,
|
66 |
+
Jake Tonges,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/la3z5y6u7tix6rnq2m5l.jpg,https://www.instagram.com/jaketonges/?hl=en,
|
67 |
+
Fred Warner,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/zo4ftfar4bshrbipceuk.jpg,https://www.instagram.com/fred_warner/?hl=en,
|
68 |
+
Jon Weeks,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/d9fvm74pu4vyinveopbf,https://www.instagram.com/jonweeks46/?hl=en,
|
69 |
+
DaShaun White,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/mjnpmkw3ar6zcj2hxxzd,https://www.instagram.com/demoeto/?hl=en,
|
70 |
+
Trent Williams,https://static.clubs.nfl.com/image/private/t_thumb_squared_2x/f_auto/49ers/bnq8i5urjualxre5caqz.jpg,https://www.instagram.com/trentwilliams71/?hl=en,
|
71 |
+
Brayden Willis,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/xmo7hsuho3ehmsjwvthc.jpg,https://www.instagram.com/brayden_willis/?hl=en,
|
72 |
+
Dee Winters,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ggf13riajo0kn0y6kbu0.jpg,https://www.instagram.com/dwints_/?hl=en,
|
73 |
+
Mitch Wishnowsky,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mkf1xr1x8nr9l55oq72a.jpg,https://www.instagram.com/mitchwish3/?hl=en,
|
74 |
+
Nick Zakelj,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/o92tva22zezdz4aksadl.jpg,https://www.instagram.com/nickzakelj/?hl=en,
|
data/april_11_multimedia_data_collect/player_headshots.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import csv
|
4 |
+
|
5 |
+
ROSTER_URL = "https://www.49ers.com/team/players-roster/"
|
6 |
+
|
7 |
+
def scrape_49ers_roster(output_csv='niners_players_headshots.csv'):
|
8 |
+
"""
|
9 |
+
Scrapes the 49ers roster page for player data and saves to CSV.
|
10 |
+
Extracts:
|
11 |
+
- Name
|
12 |
+
- Headshot Image URL
|
13 |
+
"""
|
14 |
+
response = requests.get(ROSTER_URL)
|
15 |
+
response.raise_for_status()
|
16 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
17 |
+
|
18 |
+
player_rows = soup.select('div.d3-o-table--horizontal-scroll tbody tr')
|
19 |
+
if not player_rows:
|
20 |
+
raise ValueError("No player rows found. The page structure may have changed.")
|
21 |
+
|
22 |
+
roster_data = []
|
23 |
+
for row in player_rows:
|
24 |
+
try:
|
25 |
+
# Extract player name and headshot
|
26 |
+
player_cell = row.find('td')
|
27 |
+
name_tag = player_cell.select_one('.nfl-o-roster__player-name')
|
28 |
+
name = name_tag.get_text(strip=True) if name_tag else ""
|
29 |
+
|
30 |
+
img_tag = player_cell.find('img')
|
31 |
+
headshot_url = img_tag['src'] if img_tag and img_tag.get('src') else ""
|
32 |
+
|
33 |
+
# Fix the URL by replacing t_lazy with t_thumb_squared_2x
|
34 |
+
if headshot_url:
|
35 |
+
headshot_url = headshot_url.replace('/t_thumb_squared/t_lazy/', '/t_thumb_squared_2x/')
|
36 |
+
|
37 |
+
# Other stats (in order of table columns)
|
38 |
+
# cells = row.find_all('td')
|
39 |
+
# jersey_number = cells[1].get_text(strip=True) if len(cells) > 1 else ""
|
40 |
+
# position = cells[2].get_text(strip=True) if len(cells) > 2 else ""
|
41 |
+
# height = cells[3].get_text(strip=True) if len(cells) > 3 else ""
|
42 |
+
# weight = cells[4].get_text(strip=True) if len(cells) > 4 else ""
|
43 |
+
# age = cells[5].get_text(strip=True) if len(cells) > 5 else ""
|
44 |
+
# experience = cells[6].get_text(strip=True) if len(cells) > 6 else ""
|
45 |
+
# college = cells[7].get_text(strip=True) if len(cells) > 7 else ""
|
46 |
+
|
47 |
+
roster_data.append({
|
48 |
+
'name': name,
|
49 |
+
# 'jersey_number': jersey_number,
|
50 |
+
# 'position': position,
|
51 |
+
# 'height': height,
|
52 |
+
# 'weight': weight,
|
53 |
+
# 'age': age,
|
54 |
+
# 'experience': experience,
|
55 |
+
# 'college': college,
|
56 |
+
'headshot_url': headshot_url
|
57 |
+
})
|
58 |
+
|
59 |
+
except Exception as e:
|
60 |
+
print(f"[WARNING] Skipping row due to error: {e}")
|
61 |
+
continue
|
62 |
+
|
63 |
+
# Save to CSV
|
64 |
+
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
|
65 |
+
fieldnames = ['name', 'headshot_url']
|
66 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
67 |
+
writer.writeheader()
|
68 |
+
writer.writerows(roster_data)
|
69 |
+
|
70 |
+
print(f"[INFO] Successfully saved {len(roster_data)} players to '{output_csv}'.")
|
71 |
+
|
72 |
+
if __name__ == "__main__":
|
73 |
+
scrape_49ers_roster()
|
data/april_11_multimedia_data_collect/youtube_highlights.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/april_11_multimedia_data_collect/z_old/niners_players_headshots_with_socials.csv
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name,headshot_url,instagram_url
|
2 |
+
George Odum,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/sqpxhoycdpegkyjn6ooc.jpg,https://www.instagram.com/george.w.odum/?hl=en
|
3 |
+
Sam Okuayinonu,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/fyolr2zk2nplfbdze75l.jpg,https://www.instagram.com/sam.ok97/?hl=en
|
4 |
+
Terique Owens,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/okhin0uwdon2nimvbtwd.png,https://www.instagram.com/terique_owens/?hl=en
|
5 |
+
Ricky Pearsall,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/to7q7w4kjiajseb4ljcx.png,https://www.instagram.com/ricky.pearsall/?hl=en
|
6 |
+
Jason Pinnock,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/on29awacb9frijyggtgt,https://www.instagram.com/jpinny15/?hl=en
|
7 |
+
Austen Pleasants,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wsbs5emdyzuc1sudbcls.png,https://www.instagram.com/oursf49ers/p/DDr48a4PdcO/?hl=en
|
8 |
+
Mason Pline,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/mvjlaxpu8bu33ohspqot.png,https://www.instagram.com/mpline12/?hl=en
|
9 |
+
Dominick Puni,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/tq1snozjpjrgrjoflrfg.png,https://www.instagram.com/dompuni/?hl=en
|
10 |
+
Brock Purdy,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wt42ykvuxpngm4m1axxn.png,https://www.instagram.com/brock.purdy13/?hl=en
|
11 |
+
Curtis Robinson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/x3xyzgeapcafr0gicl5y.jpg,https://www.instagram.com/curtis_robinsonn/?hl=en
|
12 |
+
Demarcus Robinson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/lakf0xue1qqb7ed4p6ge,https://www.instagram.com/demarcusrobinson/?hl=en
|
13 |
+
Patrick Taylor Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/hochjncae0hqcoveuexq.jpg,https://www.instagram.com/patricktaylor/?hl=en
|
14 |
+
Trent Taylor,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/j8lom4fnsveujt8hykef.jpg,https://www.instagram.com/trent5taylor/?hl=en
|
15 |
+
Tre Tomlinson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/n5pfv126xw0psc0d1ydz,https://www.instagram.com/trevius/?hl=en
|
16 |
+
Jake Tonges,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/la3z5y6u7tix6rnq2m5l.jpg,https://www.instagram.com/jaketonges/?hl=en
|
17 |
+
Fred Warner,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/zo4ftfar4bshrbipceuk.jpg,https://www.instagram.com/fred_warner/?hl=en
|
18 |
+
Jon Weeks,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/d9fvm74pu4vyinveopbf,https://www.instagram.com/jonweeks46/?hl=en
|
19 |
+
DaShaun White,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/mjnpmkw3ar6zcj2hxxzd,https://www.instagram.com/demoeto/?hl=en
|
20 |
+
Trent Williams,https://static.clubs.nfl.com/image/private/t_thumb_squared_2x/f_auto/49ers/bnq8i5urjualxre5caqz.jpg,https://www.instagram.com/trentwilliams71/?hl=en
|
21 |
+
Brayden Willis,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/xmo7hsuho3ehmsjwvthc.jpg,https://www.instagram.com/brayden_willis/?hl=en
|
22 |
+
Dee Winters,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ggf13riajo0kn0y6kbu0.jpg,https://www.instagram.com/dwints_/?hl=en
|
23 |
+
Mitch Wishnowsky,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mkf1xr1x8nr9l55oq72a.jpg,https://www.instagram.com/mitchwish3/?hl=en
|
24 |
+
Nick Zakelj,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/o92tva22zezdz4aksadl.jpg,https://www.instagram.com/nickzakelj/?hl=en
|
data/april_11_multimedia_data_collect/z_old/niners_players_headshots_with_socials_v1_safe.csv
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name,headshot_url,instagram_url
|
2 |
+
Israel Abanikanda,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/wo7d9oli06eki4mnh3i8.png,https://www.instagram.com/izzygetsbusy__/?hl=en
|
3 |
+
Brandon Aiyuk,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/khwofxjjwx0hcaigzxhw.png,https://www.instagram.com/brandonaiyuk/?hl=en
|
4 |
+
Isaac Alarcon,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mlhuuxukyusodzlfsmnv.jpg,https://www.instagram.com/isaac_algar/?hl=en
|
5 |
+
Evan Anderson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/ng7oamywxvqgkx6l6kqc.png,https://www.instagram.com/klamps8/?hl=en
|
6 |
+
Tre Avery,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/a7kfv7xjftqlaqghk6sg,https://www.instagram.com/t.avery21/?hl=en
|
7 |
+
Alex Barrett,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/bm0ay22de39d1enrxwiq.jpg,https://www.instagram.com/alex.barrett/?hl=en
|
8 |
+
Ben Bartch,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/aqaslodzr7y0yvh5zzxa.jpg,https://www.instagram.com/bartchben/
|
9 |
+
Robert Beal Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/jwwhmt5d8mi0vdb8nfic.jpg,https://www.instagram.com/oursf49ers/reel/C_CVQxxp2ti/
|
10 |
+
Tatum Bethune,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/vl08pinqpmoubdf0zy5s.png,https://www.instagram.com/tatumx15/?hl=en
|
11 |
+
Nick Bosa,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/utiwswqvpkiwtocijwhz.jpg,https://www.instagram.com/nbsmallerbear/?hl=en
|
12 |
+
Jake Brendel,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/svsb41aekpzt3m9snilw.jpg,https://www.instagram.com/jake.brendel/?hl=en
|
13 |
+
Ji'Ayir Brown,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/urillpic02z774n09xvf.jpg,https://www.instagram.com/_tiig/?hl=en
|
14 |
+
Tre Brown,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/dpemqrrweakt8dci3qfb,https://www.instagram.com/tre_brown25/?hl=en
|
15 |
+
Spencer Burford,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/lje3ae25dntkdudp6eex.jpg,https://www.instagram.com/spence__74/?hl=en
|
16 |
+
Jacob Cowing,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/lg7aao0umc21oioufqdx.png,https://www.instagram.com/jaycowing_/?hl=en
|
17 |
+
Kalia Davis,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/rmnxj3sh7pyldmcxqe32.jpg,https://www.instagram.com/ucf.football/p/C3No6rTugDe/
|
18 |
+
Jordan Elliott,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/xbyky8r2yuzusd2tmrw8.jpg,https://www.instagram.com/jordanelliott_nbcs/
|
19 |
+
Luke Farrell,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/f2z7wpmx7ngtxcqqedla,https://www.instagram.com/lukefarrell89/?hl=en
|
20 |
+
Russell Gage Jr.,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/lkqhshv0dss1b9c6mdnj,https://www.instagram.com/w8k3mupruss/?hl=en
|
21 |
+
Jonathan Garvin,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/rapfcxut6vu50vcevswe.png,https://www.instagram.com/thesfniners/p/DCmgF8KSw2A/?hl=en
|
22 |
+
Luke Gifford,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/mhdbbzj8amttnpd1nbpn,https://www.instagram.com/luke_gifford/?hl=en
|
23 |
+
Kevin Givens,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/mstmgft0e0ancdzspboy.jpg,https://www.instagram.com/49ers/p/DAg_Pvpz1vV/
|
24 |
+
Jalen Graham,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/pbl2a1ujopvwqrfct0jp.jpg,https://www.instagram.com/thexniners/p/CruR8IPrSV7/
|
25 |
+
Richie Grant,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/szeswtvt6jmbu3so3phd,https://www.instagram.com/richiegrant_/?hl=en
|
26 |
+
Renardo Green,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/v79obx9v7tgcjjlo6hiy.png,https://www.instagram.com/dondada.8/?hl=en
|
27 |
+
Yetur Gross-Matos,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/etuaajmvhbc5qkebgoow.jpg,https://www.instagram.com/__lobo99/?hl=en
|
28 |
+
Isaac Guerendo,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/b66rpzr9iauo5rdprvka.png,https://www.instagram.com/isaac_guerendo/?hl=en
|
29 |
+
Sebastian Gutierrez,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/am9sywgkga6jq65hvboe.jpg,https://www.instagram.com/sebastiandev1/?hl=en
|
30 |
+
Matt Hennessy,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/zk8b21o8ncxnyu0gyf23,https://www.instagram.com/matt___hennessy/?hl=en
|
31 |
+
Isaiah Hodgins,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ax1oft9kqida0eokvtes.jpg,https://www.instagram.com/isaiahhodgins/?hl=en
|
32 |
+
Drake Jackson,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/y2luyplzpvbzokyfbmla.jpg,https://www.instagram.com/thefreak/?hl=en
|
33 |
+
Tarron Jackson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/pnqrjp76bgpkmacxma3r,https://www.instagram.com/tarron_jackson/?hl=en
|
34 |
+
Jauan Jennings,https://static.clubs.nfl.com/image/private/t_thumb_squared_2x/f_auto/49ers/wxsq7f4ajmhfs6tn4dg2.jpg,https://www.instagram.com/u_aintjj/?hl=en
|
35 |
+
Quindell Johnson,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/uga90lawcfxjcqna7opb,https://www.instagram.com/p/DFGnwNlymc9/
|
36 |
+
Zack Johnson,https://static.www.nfl.com/image/private/t_thumb_squared_2x/f_auto/league/n4hy8uzhcl5cl0ricwoa,https://www.instagram.com/zack.johnson.68/
|
37 |
+
Mac Jones,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/pedpdxybeus7mrovsoko,https://www.instagram.com/macjones_10/?hl=en
|
38 |
+
Kyle Juszczyk,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ywdz6y2pfzndqgmxxfbj.jpg,https://www.instagram.com/juicecheck44/?hl=en
|
39 |
+
George Kittle,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/elheepobwn1ahqwtfwat.jpg,https://www.instagram.com/gkittle/?hl=en
|
40 |
+
Deommodore Lenoir,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/f9fnuvbpcxku9ibt9qs8.jpg,https://www.instagram.com/deommo.lenoir/?hl=en
|
41 |
+
Chase Lucas,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/gjeejt5pbagnipodhdz4.jpg,https://www.instagram.com/chase_lucas24/?hl=en
|
42 |
+
Darrell Luter Jr.,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/g5rohvooet9g5w7rlhrh.jpg,https://www.instagram.com/_d.ray4k/?hl=en
|
43 |
+
Jaylen Mahoney,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/yv9inbia05nyxppuajv0.png,https://www.instagram.com/jaylenmahoney_/
|
44 |
+
Christian McCaffrey,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/a8fka6shomakkbllljgt.jpg,https://www.instagram.com/christianmccaffrey/?hl=en
|
45 |
+
Jalen McKenzie,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/gffxpns1ayxyjccymr6d.jpg,https://www.instagram.com/jay_peez70/?hl=en
|
46 |
+
Colton McKivitz,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/jugvoxjabgsbcfbuqfew.jpg,https://www.instagram.com/cmckivitz53/?hl=en
|
47 |
+
Jake Moody,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_auto/49ers/ygputwsbutemszr8xxkw.jpg,https://www.instagram.com/jmoods_/?hl=en
|
48 |
+
Tanner Mordecai,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/y8gipodnkeapgmegnxs1.png,https://www.instagram.com/t_mordecai/?hl=en
|
49 |
+
Malik Mustapha,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/eyrgxgpbrycd9x8glk0j.png,https://www.instagram.com/stapha/
|
50 |
+
Siran Neal,https://static.www.nfl.com/image/upload/t_thumb_squared_2x/f_auto/league/muhthfs6owkkpsyop1e6,https://www.instagram.com/siranneal/?hl=en
|
51 |
+
Drake Nugent,https://static.clubs.nfl.com/image/upload/t_thumb_squared_2x/f_png/49ers/qyb4kurtbv9uflmupfnc.png,https://www.instagram.com/drakenugent9/?hl=en
|
data/{create_embeddings.py β z_old/create_embeddings.py}
RENAMED
File without changes
|
data/{kml_cleanup.py β z_old/kml_cleanup.py}
RENAMED
File without changes
|
data/{kmz_file_explorer.ipynb β z_old/kmz_file_explorer.ipynb}
RENAMED
File without changes
|
data/{temp_unzipped β z_old/temp_unzipped}/doc.kml
RENAMED
File without changes
|
data/{upload_embeddings.py β z_old/upload_embeddings.py}
RENAMED
File without changes
|
data/{z_49ers_fan_chapters_DNU.csv β z_old/z_49ers_fan_chapters_DNU.csv}
RENAMED
File without changes
|
data/{z_fan_chapters_clean_DNU.csv β z_old/z_fan_chapters_clean_DNU.csv}
RENAMED
File without changes
|