Thomas G. Lopes
add global server cache
ebb4888
raw
history blame
9.15 kB
import type { Model, ModelWithTokenizer } from "$lib/types.js";
import { json } from "@sveltejs/kit";
import type { RequestHandler } from "./$types.js";
enum CacheStatus {
SUCCESS = "success",
PARTIAL = "partial",
ERROR = "error",
}
type Cache = {
data: ModelWithTokenizer[] | undefined;
timestamp: number;
status: CacheStatus;
// Track failed models to selectively refetch them
failedTokenizers: string[]; // Using array instead of Set for serialization compatibility
failedApiCalls: {
textGeneration: boolean;
imageTextToText: boolean;
};
};
const cache: Cache = {
data: undefined,
timestamp: 0,
status: CacheStatus.ERROR,
failedTokenizers: [],
failedApiCalls: {
textGeneration: false,
imageTextToText: false,
},
};
// The time between cache refreshes
const FULL_CACHE_REFRESH = 1000 * 60 * 60; // 1 hour
const PARTIAL_CACHE_REFRESH = 1000 * 60 * 15; // 15 minutes (shorter for partial results)
const headers: HeadersInit = {
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Priority": "u=0, i",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
};
const requestInit: RequestInit = {
credentials: "include",
headers,
method: "GET",
mode: "cors",
};
interface ApiQueryParams {
pipeline_tag?: "text-generation" | "image-text-to-text";
filter: string;
inference_provider: string;
limit: number;
expand: string[];
}
const queryParams: ApiQueryParams = {
filter: "conversational",
inference_provider: "all",
limit: 100,
expand: ["inferenceProviderMapping", "config", "library_name", "pipeline_tag", "tags", "mask_token", "trendingScore"],
};
const baseUrl = "https://huggingface.co/api/models";
function buildApiUrl(params: ApiQueryParams): string {
const url = new URL(baseUrl);
// Add simple params
Object.entries(params).forEach(([key, value]) => {
if (!Array.isArray(value)) {
url.searchParams.append(key, String(value));
}
});
// Handle array params specially
params.expand.forEach(item => {
url.searchParams.append("expand[]", item);
});
return url.toString();
}
export const GET: RequestHandler = async ({ fetch }) => {
const timestamp = Date.now();
// Determine if cache is valid
const elapsed = timestamp - cache.timestamp;
const cacheRefreshTime = cache.status === CacheStatus.SUCCESS ? FULL_CACHE_REFRESH : PARTIAL_CACHE_REFRESH;
// Use cache if it's still valid and has data
if (elapsed < cacheRefreshTime && cache.data?.length) {
console.log(`Using ${cache.status} cache (${Math.floor(elapsed / 1000 / 60)} min old)`);
return json(cache.data);
}
try {
// Determine which API calls we need to make based on cache status
const needTextGenFetch = elapsed >= FULL_CACHE_REFRESH || cache.failedApiCalls.textGeneration;
const needImgTextFetch = elapsed >= FULL_CACHE_REFRESH || cache.failedApiCalls.imageTextToText;
// Track the existing models we'll keep
const existingModels = new Map<string, ModelWithTokenizer>();
if (cache.data) {
cache.data.forEach(model => {
existingModels.set(model.id, model);
});
}
// Initialize new tracking for failed requests
const newFailedTokenizers: string[] = [];
const newFailedApiCalls = {
textGeneration: false,
imageTextToText: false,
};
// Fetch models as needed
let textGenModels: Model[] = [];
let imgText2TextModels: Model[] = [];
// Make the needed API calls in parallel
const apiPromises: Promise<Response | void>[] = [];
if (needTextGenFetch) {
apiPromises.push(
fetch(buildApiUrl({ ...queryParams, pipeline_tag: "text-generation" }), requestInit).then(async response => {
if (!response.ok) {
console.error(`Error fetching text-generation models`, response.status, response.statusText);
newFailedApiCalls.textGeneration = true;
} else {
textGenModels = await response.json();
}
})
);
}
if (needImgTextFetch) {
apiPromises.push(
fetch(buildApiUrl({ ...queryParams, pipeline_tag: "image-text-to-text" }), requestInit).then(async response => {
if (!response.ok) {
console.error(`Error fetching image-text-to-text models`, response.status, response.statusText);
newFailedApiCalls.imageTextToText = true;
} else {
imgText2TextModels = await response.json();
}
})
);
}
await Promise.all(apiPromises);
// If both needed API calls failed and we have cached data, use it
if (
needTextGenFetch &&
newFailedApiCalls.textGeneration &&
needImgTextFetch &&
newFailedApiCalls.imageTextToText &&
cache.data?.length
) {
console.log("All API requests failed. Using existing cache as fallback.");
cache.status = CacheStatus.ERROR;
cache.timestamp = timestamp; // Update timestamp to avoid rapid retry loops
cache.failedApiCalls = newFailedApiCalls;
return json(cache.data);
}
// For API calls we didn't need to make, use cached models
if (!needTextGenFetch && cache.data) {
textGenModels = cache.data.filter(model => model.pipeline_tag === "text-generation").map(model => model as Model);
}
if (!needImgTextFetch && cache.data) {
imgText2TextModels = cache.data
.filter(model => model.pipeline_tag === "image-text-to-text")
.map(model => model as Model);
}
const allModels: Model[] = [...textGenModels, ...imgText2TextModels];
const modelsNeedingTokenizer: Model[] = [];
// First, use existing model data when possible
allModels.forEach(model => {
const existingModel = existingModels.get(model.id);
// Only fetch tokenizer if:
// 1. We don't have this model yet, OR
// 2. It's in our failed tokenizers list AND we're doing a refresh, OR
// 3. We're doing a full refresh
if (
!existingModel ||
(cache.failedTokenizers.includes(model.id) && elapsed >= PARTIAL_CACHE_REFRESH) ||
elapsed >= FULL_CACHE_REFRESH
) {
modelsNeedingTokenizer.push(model);
}
});
console.log(`Total models: ${allModels.length}, Models needing tokenizer fetch: ${modelsNeedingTokenizer.length}`);
// Prepare result - start with existing models we want to keep
const models: ModelWithTokenizer[] = [];
// Add models we're not re-fetching tokenizers for
allModels.forEach(model => {
const existingModel = existingModels.get(model.id);
if (existingModel && !modelsNeedingTokenizer.some(m => m.id === model.id)) {
models.push(existingModel);
}
});
// Fetch tokenizer configs only for models that need it, with concurrency limit
const batchSize = 10; // Limit concurrent requests
for (let i = 0; i < modelsNeedingTokenizer.length; i += batchSize) {
const batch = modelsNeedingTokenizer.slice(i, i + batchSize);
const batchPromises = batch.map(async model => {
try {
const configUrl = `https://huggingface.co/${model.id}/raw/main/tokenizer_config.json`;
const res = await fetch(configUrl, {
credentials: "include",
headers,
method: "GET",
mode: "cors",
});
if (!res.ok) {
if (!newFailedTokenizers.includes(model.id)) {
newFailedTokenizers.push(model.id);
}
return null;
}
const tokenizerConfig = await res.json();
return { ...model, tokenizerConfig } satisfies ModelWithTokenizer;
} catch (error) {
console.error(`Error processing tokenizer for ${model.id}:`, error);
if (!newFailedTokenizers.includes(model.id)) {
newFailedTokenizers.push(model.id);
}
return null;
}
});
const batchResults = await Promise.all(batchPromises);
models.push(...batchResults.filter((model): model is ModelWithTokenizer => model !== null));
}
models.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
// Determine cache status based on failures
const hasApiFailures = newFailedApiCalls.textGeneration || newFailedApiCalls.imageTextToText;
const hasSignificantTokenizerFailures = newFailedTokenizers.length > modelsNeedingTokenizer.length * 0.2;
const cacheStatus = hasApiFailures || hasSignificantTokenizerFailures ? CacheStatus.PARTIAL : CacheStatus.SUCCESS;
cache.data = models;
cache.timestamp = timestamp;
cache.status = cacheStatus;
cache.failedTokenizers = newFailedTokenizers;
cache.failedApiCalls = newFailedApiCalls;
console.log(
`Cache updated: ${models.length} models, status: ${cacheStatus}, ` +
`failed tokenizers: ${newFailedTokenizers.length}, ` +
`API failures: text=${newFailedApiCalls.textGeneration}, img=${newFailedApiCalls.imageTextToText}`
);
return json(models);
} catch (error) {
console.error("Error fetching models:", error);
// If we have cached data, use it as fallback
if (cache.data?.length) {
cache.status = CacheStatus.ERROR;
// Mark all API calls as failed so we retry them next time
cache.failedApiCalls = {
textGeneration: true,
imageTextToText: true,
};
return json(cache.data);
}
// No cache available, return empty array
cache.status = CacheStatus.ERROR;
cache.timestamp = timestamp;
cache.failedApiCalls = {
textGeneration: true,
imageTextToText: true,
};
return json([]);
}
};