Xianbao QIAN
fix a few minor issues
e2e991a
import { PROVIDERS_MAP } from './providers';
export interface ModelData {
id: string;
name: string;
createdAt: string;
likes: number;
downloads?: number;
isDataset?: boolean;
provider: string;
}
export interface Activity {
date: string;
count: number;
level: number;
}
export interface CalendarData {
[key: string]: Activity[];
}
export interface MonthlyActivity {
date: string; // YYYY-MM format
count: number;
provider: string;
isDataset: boolean | null; // null means it includes both
name?: string;
}
export interface DetailedModelData {
id: string;
name: string;
createdAt: string;
likes: number;
downloads: number;
monthKey: string; // YYYY-MM
provider: string;
sortKey: string; // YYYY-MM
isDataset: boolean;
}
// Generates calendar data from model data
export const generateCalendarData = (modelData: ModelData[]): CalendarData => {
const data: CalendarData = Object.fromEntries(
Object.keys(PROVIDERS_MAP).map(provider => [provider, []])
);
const today = new Date();
const startDate = new Date(today);
startDate.setMonth(today.getMonth() - 11);
startDate.setDate(1);
// Create a map to store counts for each provider and date
const countMap: Record<string, Record<string, number>> = {};
if (!Array.isArray(modelData)) {
console.error('Model data is not an array:', modelData);
modelData = [];
}
modelData.forEach(item => {
const [org] = item.id.split('/');
const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) =>
info.authors.includes(org)
)?.[0];
if (provider) {
const date = item.createdAt.split('T')[0];
if (!countMap[provider]) {
countMap[provider] = {};
}
countMap[provider][date] = (countMap[provider][date] || 0) + 1;
}
});
// Fill in the data array with actual counts and zero counts
Object.keys(PROVIDERS_MAP).forEach(provider => {
let currentDate = new Date(startDate);
while (currentDate <= today) {
const dateStr = currentDate.toISOString().split('T')[0];
const count = countMap[provider]?.[dateStr] || 0;
data[provider].push({
date: dateStr,
count,
level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 2))
});
currentDate.setDate(currentDate.getDate() + 1);
}
});
return data;
};
// Aggregates calendar data from all providers into a single heatmap
export const aggregateCalendarData = (calendarData: CalendarData): Activity[] => {
if (Object.keys(calendarData).length === 0) return [];
// Get the first provider's data to get the date range
const firstProviderData = Object.values(calendarData)[0];
// Create a map to store aggregated counts by date
const aggregatedCounts: Record<string, number> = {};
// Sum up counts for each date across all providers
Object.values(calendarData).forEach(providerData => {
providerData.forEach(activity => {
aggregatedCounts[activity.date] = (aggregatedCounts[activity.date] || 0) + activity.count;
});
});
// Convert the aggregated counts into Activity array
return firstProviderData.map(({ date }) => {
const count = aggregatedCounts[date] || 0;
return {
date,
count,
level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 3))
};
});
};
export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => {
const monthlyData: Record<string, Record<string, Record<string, MonthlyActivity>>> = {};
modelData.forEach(model => {
const date = new Date(model.createdAt);
const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
const provider = model.provider || 'unknown';
const type = model.isDataset ? 'dataset' : 'model';
if (!monthlyData[monthKey]) {
monthlyData[monthKey] = {};
}
if (!monthlyData[monthKey][provider]) {
monthlyData[monthKey][provider] = {};
}
if (!monthlyData[monthKey][provider][type]) {
monthlyData[monthKey][provider][type] = {
date: monthKey,
count: 0,
provider,
isDataset: model.isDataset ?? false, // Use nullish coalescing to provide a default
name: model.name
};
}
monthlyData[monthKey][provider][type].count++;
});
// Flatten the nested structure
return Object.values(monthlyData)
.flatMap(providerData =>
Object.values(providerData).flatMap(typeData =>
Object.values(typeData)
)
)
.sort((a, b) => a.date.localeCompare(b.date));
};
const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
async function fetchWithRetry(url: string, retries = 3, delayMs = 1000): Promise<Response> {
for (let i = 0; i < retries; i++) {
try {
const response = await fetch(url);
if (response.status === 429) { // Rate limit exceeded
console.log(`Rate limit exceeded, waiting ${delayMs}ms before retry ${i + 1}/${retries}`);
await delay(delayMs);
continue;
}
return response;
} catch (error) {
if (i === retries - 1) throw error;
console.log(`Fetch failed, retrying (${i + 1}/${retries})...`);
await delay(delayMs);
}
}
throw new Error('Max retries reached');
}
export const fetchAllModelData = async (): Promise<ModelData[]> => {
const allData: ModelData[] = [];
for (const [provider, info] of Object.entries(PROVIDERS_MAP)) {
console.log(`Fetching data for provider: ${provider}`);
for (const author of info.authors) {
console.log(` Fetching data for author: ${author}`);
try {
// Fetch models
const modelResponse = await fetchWithRetry(
`https://huggingface.co/api/models?author=${author}&sort=likes&direction=-1&limit=10000`,
3,
2000
);
const modelData = await modelResponse.json();
// Fetch datasets
const datasetResponse = await fetchWithRetry(
`https://huggingface.co/api/datasets?author=${author}&sort=likes&direction=-1&limit=10000`,
3,
2000
);
const datasetData = await datasetResponse.json();
// Combine and process the data
const combinedData = [
...modelData.map((item: any) => ({
id: item.id,
name: item.id,
createdAt: item.createdAt,
likes: item.likes,
downloads: item.downloads,
isDataset: false,
provider
})),
...datasetData.map((item: any) => ({
id: item.id,
name: item.id,
createdAt: item.createdAt,
likes: item.likes,
downloads: item.downloads,
isDataset: true,
provider
}))
];
allData.push(...combinedData);
console.log(` Fetched ${combinedData.length} items (${modelData.length} models, ${datasetData.length} datasets) for ${author}`);
} catch (error) {
console.error(`Error fetching data for ${provider}/${author}:`, error);
}
// Add a delay between author requests to avoid rate limiting
await delay(1000);
}
}
// Remove duplicates based on id
const uniqueData = Array.from(
new Map(allData.map(item => [item.id, item])).values()
);
console.log(`Total unique items fetched: ${uniqueData.length}`);
return uniqueData;
};
export function processDetailedModelData(models: ModelData[]): DetailedModelData[] {
return models.map(model => {
const date = new Date(model.createdAt);
const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
const provider = model.provider || 'unknown';
const sortKey = `${monthKey}-${model.name}`;
return {
id: model.id,
name: model.name || model.id,
createdAt: model.createdAt,
likes: model.likes || 0,
downloads: model.downloads || 0, // Set downloads to 0 if it's undefined
monthKey,
provider,
sortKey,
isDataset: model.isDataset ?? false
};
});
}
// Helper function to get total monthly data across all providers
export const getTotalMonthlyData = (monthlyData: MonthlyActivity[]): MonthlyActivity[] => {
const totalByMonth: Record<string, Record<'model' | 'dataset' | 'all', number>> = {};
// Initialize the structure
monthlyData.forEach(({ date }) => {
if (!totalByMonth[date]) {
totalByMonth[date] = { model: 0, dataset: 0, all: 0 };
}
});
// Aggregate counts by month and type
monthlyData.forEach(({ date, count, isDataset }) => {
const type = isDataset ? 'dataset' : 'model';
totalByMonth[date][type] += count;
totalByMonth[date].all += count; // Track combined count
});
// Convert to MonthlyActivity array
return Object.entries(totalByMonth).flatMap(([date, counts]) => [
{
date,
count: counts.all,
provider: 'Total',
isDataset: null // null means it includes both
},
{
date,
count: counts.model,
provider: 'Total',
isDataset: false
},
{
date,
count: counts.dataset,
provider: 'Total',
isDataset: true
}
]).sort((a, b) => a.date.localeCompare(b.date));
};
// Convert monthly activity data to CSV format
export const convertToCSV = (data: MonthlyActivity[]): string => {
// Group data by date
const dataByDate: Record<string, Record<string, number>> = {};
const providers = new Set<string>();
// Initialize and collect data
data.forEach(({ date, provider, count }) => {
if (!dataByDate[date]) {
dataByDate[date] = {};
}
dataByDate[date][provider] = count;
providers.add(provider);
});
// Create CSV header
const header = ['Date', ...Array.from(providers)];
// Create CSV rows
const rows = Object.entries(dataByDate)
.sort(([a], [b]) => a.localeCompare(b))
.map(([date, providerData]) => {
const row = [date];
header.slice(1).forEach(provider => {
row.push((providerData[provider] || 0).toString());
});
return row;
});
// Combine header and rows
return [header, ...rows]
.map(row => row.join(','))
.join('\n');
};