|
import { PROVIDERS_MAP } from './providers'; |
|
|
|
export interface ModelData { |
|
id: string; |
|
name: string; |
|
createdAt: string; |
|
likes: number; |
|
downloads?: number; |
|
isDataset?: boolean; |
|
provider: string; |
|
} |
|
|
|
export interface Activity { |
|
date: string; |
|
count: number; |
|
level: number; |
|
} |
|
|
|
export interface CalendarData { |
|
[key: string]: Activity[]; |
|
} |
|
|
|
export interface MonthlyActivity { |
|
date: string; |
|
count: number; |
|
provider: string; |
|
isDataset: boolean | null; |
|
name?: string; |
|
} |
|
|
|
export interface DetailedModelData { |
|
id: string; |
|
name: string; |
|
createdAt: string; |
|
likes: number; |
|
downloads: number; |
|
monthKey: string; |
|
provider: string; |
|
sortKey: string; |
|
isDataset: boolean; |
|
} |
|
|
|
|
|
export const generateCalendarData = (modelData: ModelData[]): CalendarData => { |
|
const data: CalendarData = Object.fromEntries( |
|
Object.keys(PROVIDERS_MAP).map(provider => [provider, []]) |
|
); |
|
|
|
const today = new Date(); |
|
const startDate = new Date(today); |
|
startDate.setMonth(today.getMonth() - 11); |
|
startDate.setDate(1); |
|
|
|
|
|
const countMap: Record<string, Record<string, number>> = {}; |
|
|
|
if (!Array.isArray(modelData)) { |
|
console.error('Model data is not an array:', modelData); |
|
modelData = []; |
|
} |
|
|
|
modelData.forEach(item => { |
|
const [org] = item.id.split('/'); |
|
const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) => |
|
info.authors.includes(org) |
|
)?.[0]; |
|
|
|
if (provider) { |
|
const date = item.createdAt.split('T')[0]; |
|
if (!countMap[provider]) { |
|
countMap[provider] = {}; |
|
} |
|
countMap[provider][date] = (countMap[provider][date] || 0) + 1; |
|
} |
|
}); |
|
|
|
|
|
Object.keys(PROVIDERS_MAP).forEach(provider => { |
|
let currentDate = new Date(startDate); |
|
while (currentDate <= today) { |
|
const dateStr = currentDate.toISOString().split('T')[0]; |
|
const count = countMap[provider]?.[dateStr] || 0; |
|
data[provider].push({ |
|
date: dateStr, |
|
count, |
|
level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 2)) |
|
}); |
|
currentDate.setDate(currentDate.getDate() + 1); |
|
} |
|
}); |
|
|
|
return data; |
|
}; |
|
|
|
|
|
export const aggregateCalendarData = (calendarData: CalendarData): Activity[] => { |
|
if (Object.keys(calendarData).length === 0) return []; |
|
|
|
|
|
const firstProviderData = Object.values(calendarData)[0]; |
|
|
|
|
|
const aggregatedCounts: Record<string, number> = {}; |
|
|
|
|
|
Object.values(calendarData).forEach(providerData => { |
|
providerData.forEach(activity => { |
|
aggregatedCounts[activity.date] = (aggregatedCounts[activity.date] || 0) + activity.count; |
|
}); |
|
}); |
|
|
|
|
|
return firstProviderData.map(({ date }) => { |
|
const count = aggregatedCounts[date] || 0; |
|
return { |
|
date, |
|
count, |
|
level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 3)) |
|
}; |
|
}); |
|
}; |
|
|
|
export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => { |
|
const monthlyData: Record<string, Record<string, Record<string, MonthlyActivity>>> = {}; |
|
|
|
modelData.forEach(model => { |
|
const date = new Date(model.createdAt); |
|
const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`; |
|
const provider = model.provider || 'unknown'; |
|
const type = model.isDataset ? 'dataset' : 'model'; |
|
|
|
if (!monthlyData[monthKey]) { |
|
monthlyData[monthKey] = {}; |
|
} |
|
if (!monthlyData[monthKey][provider]) { |
|
monthlyData[monthKey][provider] = {}; |
|
} |
|
|
|
if (!monthlyData[monthKey][provider][type]) { |
|
monthlyData[monthKey][provider][type] = { |
|
date: monthKey, |
|
count: 0, |
|
provider, |
|
isDataset: model.isDataset ?? false, |
|
name: model.name |
|
}; |
|
} |
|
|
|
monthlyData[monthKey][provider][type].count++; |
|
}); |
|
|
|
|
|
return Object.values(monthlyData) |
|
.flatMap(providerData => |
|
Object.values(providerData).flatMap(typeData => |
|
Object.values(typeData) |
|
) |
|
) |
|
.sort((a, b) => a.date.localeCompare(b.date)); |
|
}; |
|
|
|
const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); |
|
|
|
async function fetchWithRetry(url: string, retries = 3, delayMs = 1000): Promise<Response> { |
|
for (let i = 0; i < retries; i++) { |
|
try { |
|
const response = await fetch(url); |
|
if (response.status === 429) { |
|
console.log(`Rate limit exceeded, waiting ${delayMs}ms before retry ${i + 1}/${retries}`); |
|
await delay(delayMs); |
|
continue; |
|
} |
|
return response; |
|
} catch (error) { |
|
if (i === retries - 1) throw error; |
|
console.log(`Fetch failed, retrying (${i + 1}/${retries})...`); |
|
await delay(delayMs); |
|
} |
|
} |
|
throw new Error('Max retries reached'); |
|
} |
|
|
|
export const fetchAllModelData = async (): Promise<ModelData[]> => { |
|
const allData: ModelData[] = []; |
|
|
|
for (const [provider, info] of Object.entries(PROVIDERS_MAP)) { |
|
console.log(`Fetching data for provider: ${provider}`); |
|
|
|
for (const author of info.authors) { |
|
console.log(` Fetching data for author: ${author}`); |
|
try { |
|
|
|
const modelResponse = await fetchWithRetry( |
|
`https://huggingface.co/api/models?author=${author}&sort=likes&direction=-1&limit=10000`, |
|
3, |
|
2000 |
|
); |
|
const modelData = await modelResponse.json(); |
|
|
|
|
|
const datasetResponse = await fetchWithRetry( |
|
`https://huggingface.co/api/datasets?author=${author}&sort=likes&direction=-1&limit=10000`, |
|
3, |
|
2000 |
|
); |
|
const datasetData = await datasetResponse.json(); |
|
|
|
|
|
const combinedData = [ |
|
...modelData.map((item: any) => ({ |
|
id: item.id, |
|
name: item.id, |
|
createdAt: item.createdAt, |
|
likes: item.likes, |
|
downloads: item.downloads, |
|
isDataset: false, |
|
provider |
|
})), |
|
...datasetData.map((item: any) => ({ |
|
id: item.id, |
|
name: item.id, |
|
createdAt: item.createdAt, |
|
likes: item.likes, |
|
downloads: item.downloads, |
|
isDataset: true, |
|
provider |
|
})) |
|
]; |
|
|
|
allData.push(...combinedData); |
|
console.log(` Fetched ${combinedData.length} items (${modelData.length} models, ${datasetData.length} datasets) for ${author}`); |
|
} catch (error) { |
|
console.error(`Error fetching data for ${provider}/${author}:`, error); |
|
} |
|
|
|
|
|
await delay(1000); |
|
} |
|
} |
|
|
|
|
|
const uniqueData = Array.from( |
|
new Map(allData.map(item => [item.id, item])).values() |
|
); |
|
|
|
console.log(`Total unique items fetched: ${uniqueData.length}`); |
|
return uniqueData; |
|
}; |
|
|
|
export function processDetailedModelData(models: ModelData[]): DetailedModelData[] { |
|
return models.map(model => { |
|
const date = new Date(model.createdAt); |
|
const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`; |
|
const provider = model.provider || 'unknown'; |
|
const sortKey = `${monthKey}-${model.name}`; |
|
|
|
return { |
|
id: model.id, |
|
name: model.name || model.id, |
|
createdAt: model.createdAt, |
|
likes: model.likes || 0, |
|
downloads: model.downloads || 0, |
|
monthKey, |
|
provider, |
|
sortKey, |
|
isDataset: model.isDataset ?? false |
|
}; |
|
}); |
|
} |
|
|
|
|
|
export const getTotalMonthlyData = (monthlyData: MonthlyActivity[]): MonthlyActivity[] => { |
|
const totalByMonth: Record<string, Record<'model' | 'dataset' | 'all', number>> = {}; |
|
|
|
|
|
monthlyData.forEach(({ date }) => { |
|
if (!totalByMonth[date]) { |
|
totalByMonth[date] = { model: 0, dataset: 0, all: 0 }; |
|
} |
|
}); |
|
|
|
|
|
monthlyData.forEach(({ date, count, isDataset }) => { |
|
const type = isDataset ? 'dataset' : 'model'; |
|
totalByMonth[date][type] += count; |
|
totalByMonth[date].all += count; |
|
}); |
|
|
|
|
|
return Object.entries(totalByMonth).flatMap(([date, counts]) => [ |
|
{ |
|
date, |
|
count: counts.all, |
|
provider: 'Total', |
|
isDataset: null |
|
}, |
|
{ |
|
date, |
|
count: counts.model, |
|
provider: 'Total', |
|
isDataset: false |
|
}, |
|
{ |
|
date, |
|
count: counts.dataset, |
|
provider: 'Total', |
|
isDataset: true |
|
} |
|
]).sort((a, b) => a.date.localeCompare(b.date)); |
|
}; |
|
|
|
|
|
export const convertToCSV = (data: MonthlyActivity[]): string => { |
|
|
|
const dataByDate: Record<string, Record<string, number>> = {}; |
|
const providers = new Set<string>(); |
|
|
|
|
|
data.forEach(({ date, provider, count }) => { |
|
if (!dataByDate[date]) { |
|
dataByDate[date] = {}; |
|
} |
|
dataByDate[date][provider] = count; |
|
providers.add(provider); |
|
}); |
|
|
|
|
|
const header = ['Date', ...Array.from(providers)]; |
|
|
|
|
|
const rows = Object.entries(dataByDate) |
|
.sort(([a], [b]) => a.localeCompare(b)) |
|
.map(([date, providerData]) => { |
|
const row = [date]; |
|
header.slice(1).forEach(provider => { |
|
row.push((providerData[provider] || 0).toString()); |
|
}); |
|
return row; |
|
}); |
|
|
|
|
|
return [header, ...rows] |
|
.map(row => row.join(',')) |
|
.join('\n'); |
|
}; |
|
|