Thomas G. Lopes commited on
Commit
ebb4888
·
1 Parent(s): 7991bc0

add global server cache

Browse files
Files changed (1) hide show
  1. src/routes/api/models/+server.ts +219 -41
src/routes/api/models/+server.ts CHANGED
@@ -1,9 +1,39 @@
1
  import type { Model, ModelWithTokenizer } from "$lib/types.js";
2
  import { json } from "@sveltejs/kit";
3
  import type { RequestHandler } from "./$types.js";
4
- import { dev } from "$app/environment";
5
 
6
- let cache: ModelWithTokenizer[] | undefined;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  const headers: HeadersInit = {
9
  "Upgrade-Insecure-Requests": "1",
@@ -42,80 +72,228 @@ const baseUrl = "https://huggingface.co/api/models";
42
 
43
  function buildApiUrl(params: ApiQueryParams): string {
44
  const url = new URL(baseUrl);
 
45
  // Add simple params
46
  Object.entries(params).forEach(([key, value]) => {
47
  if (!Array.isArray(value)) {
48
  url.searchParams.append(key, String(value));
49
  }
50
  });
 
51
  // Handle array params specially
52
  params.expand.forEach(item => {
53
  url.searchParams.append("expand[]", item);
54
  });
 
55
  return url.toString();
56
  }
57
 
58
  export const GET: RequestHandler = async ({ fetch }) => {
59
- if (cache?.length && dev) {
60
- console.log("Skipping load, using in memory cache");
61
- return json(cache);
 
 
 
 
 
 
 
62
  }
63
 
64
  try {
65
- // Fetch both types of models in parallel
66
- const textGenPromise = fetch(buildApiUrl({ ...queryParams, pipeline_tag: "text-generation" }), requestInit);
67
- const imgText2TextPromise = fetch(buildApiUrl({ ...queryParams, pipeline_tag: "image-text-to-text" }), requestInit);
68
- const [textGenResponse, imgText2TextResponse] = await Promise.all([textGenPromise, imgText2TextPromise]);
69
 
70
- if (!textGenResponse.ok) {
71
- console.error(`Error fetching text-generation models`, textGenResponse.status, textGenResponse.statusText);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
 
74
- if (!imgText2TextResponse.ok) {
75
- console.error(
76
- `Error fetching image-text-to-text models`,
77
- imgText2TextResponse.status,
78
- imgText2TextResponse.statusText
 
 
 
 
 
79
  );
80
  }
81
 
82
- // Parse the responses
83
- const textGenModels: Model[] = textGenResponse.ok ? await textGenResponse.json() : [];
84
- const imgText2TextModels: Model[] = imgText2TextResponse.ok ? await imgText2TextResponse.json() : [];
85
 
86
- // Combine the models
87
- const compatibleModels: Model[] = [...textGenModels, ...imgText2TextModels];
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- // Sort the models
90
- compatibleModels.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
 
 
91
 
92
- // Fetch tokenizer configs for each model
93
- const promises = compatibleModels.map(async model => {
94
- const configUrl = `https://huggingface.co/${model.id}/raw/main/tokenizer_config.json`;
95
- const res = await fetch(configUrl, {
96
- credentials: "include",
97
- headers,
98
- method: "GET",
99
- mode: "cors",
100
- });
101
 
102
- if (!res.ok) {
103
- // console.error(`Error fetching tokenizer file for ${model.id}`, res.status, res.statusText);
104
- return null; // Ignore failed requests by returning null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
 
 
 
 
 
 
106
 
107
- const tokenizerConfig = await res.json();
108
- return { ...model, tokenizerConfig } satisfies ModelWithTokenizer;
 
 
 
 
109
  });
110
 
111
- const models: ModelWithTokenizer[] = (await Promise.all(promises)).filter(
112
- (model): model is ModelWithTokenizer => model !== null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  );
114
- cache = models;
115
 
116
- return json(cache);
117
  } catch (error) {
118
  console.error("Error fetching models:", error);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  return json([]);
120
  }
121
  };
 
1
  import type { Model, ModelWithTokenizer } from "$lib/types.js";
2
  import { json } from "@sveltejs/kit";
3
  import type { RequestHandler } from "./$types.js";
 
4
 
5
+ enum CacheStatus {
6
+ SUCCESS = "success",
7
+ PARTIAL = "partial",
8
+ ERROR = "error",
9
+ }
10
+
11
+ type Cache = {
12
+ data: ModelWithTokenizer[] | undefined;
13
+ timestamp: number;
14
+ status: CacheStatus;
15
+ // Track failed models to selectively refetch them
16
+ failedTokenizers: string[]; // Using array instead of Set for serialization compatibility
17
+ failedApiCalls: {
18
+ textGeneration: boolean;
19
+ imageTextToText: boolean;
20
+ };
21
+ };
22
+
23
+ const cache: Cache = {
24
+ data: undefined,
25
+ timestamp: 0,
26
+ status: CacheStatus.ERROR,
27
+ failedTokenizers: [],
28
+ failedApiCalls: {
29
+ textGeneration: false,
30
+ imageTextToText: false,
31
+ },
32
+ };
33
+
34
+ // The time between cache refreshes
35
+ const FULL_CACHE_REFRESH = 1000 * 60 * 60; // 1 hour
36
+ const PARTIAL_CACHE_REFRESH = 1000 * 60 * 15; // 15 minutes (shorter for partial results)
37
 
38
  const headers: HeadersInit = {
39
  "Upgrade-Insecure-Requests": "1",
 
72
 
73
  function buildApiUrl(params: ApiQueryParams): string {
74
  const url = new URL(baseUrl);
75
+
76
  // Add simple params
77
  Object.entries(params).forEach(([key, value]) => {
78
  if (!Array.isArray(value)) {
79
  url.searchParams.append(key, String(value));
80
  }
81
  });
82
+
83
  // Handle array params specially
84
  params.expand.forEach(item => {
85
  url.searchParams.append("expand[]", item);
86
  });
87
+
88
  return url.toString();
89
  }
90
 
91
  export const GET: RequestHandler = async ({ fetch }) => {
92
+ const timestamp = Date.now();
93
+
94
+ // Determine if cache is valid
95
+ const elapsed = timestamp - cache.timestamp;
96
+ const cacheRefreshTime = cache.status === CacheStatus.SUCCESS ? FULL_CACHE_REFRESH : PARTIAL_CACHE_REFRESH;
97
+
98
+ // Use cache if it's still valid and has data
99
+ if (elapsed < cacheRefreshTime && cache.data?.length) {
100
+ console.log(`Using ${cache.status} cache (${Math.floor(elapsed / 1000 / 60)} min old)`);
101
+ return json(cache.data);
102
  }
103
 
104
  try {
105
+ // Determine which API calls we need to make based on cache status
106
+ const needTextGenFetch = elapsed >= FULL_CACHE_REFRESH || cache.failedApiCalls.textGeneration;
107
+ const needImgTextFetch = elapsed >= FULL_CACHE_REFRESH || cache.failedApiCalls.imageTextToText;
 
108
 
109
+ // Track the existing models we'll keep
110
+ const existingModels = new Map<string, ModelWithTokenizer>();
111
+ if (cache.data) {
112
+ cache.data.forEach(model => {
113
+ existingModels.set(model.id, model);
114
+ });
115
+ }
116
+
117
+ // Initialize new tracking for failed requests
118
+ const newFailedTokenizers: string[] = [];
119
+ const newFailedApiCalls = {
120
+ textGeneration: false,
121
+ imageTextToText: false,
122
+ };
123
+
124
+ // Fetch models as needed
125
+ let textGenModels: Model[] = [];
126
+ let imgText2TextModels: Model[] = [];
127
+
128
+ // Make the needed API calls in parallel
129
+ const apiPromises: Promise<Response | void>[] = [];
130
+ if (needTextGenFetch) {
131
+ apiPromises.push(
132
+ fetch(buildApiUrl({ ...queryParams, pipeline_tag: "text-generation" }), requestInit).then(async response => {
133
+ if (!response.ok) {
134
+ console.error(`Error fetching text-generation models`, response.status, response.statusText);
135
+ newFailedApiCalls.textGeneration = true;
136
+ } else {
137
+ textGenModels = await response.json();
138
+ }
139
+ })
140
+ );
141
  }
142
 
143
+ if (needImgTextFetch) {
144
+ apiPromises.push(
145
+ fetch(buildApiUrl({ ...queryParams, pipeline_tag: "image-text-to-text" }), requestInit).then(async response => {
146
+ if (!response.ok) {
147
+ console.error(`Error fetching image-text-to-text models`, response.status, response.statusText);
148
+ newFailedApiCalls.imageTextToText = true;
149
+ } else {
150
+ imgText2TextModels = await response.json();
151
+ }
152
+ })
153
  );
154
  }
155
 
156
+ await Promise.all(apiPromises);
 
 
157
 
158
+ // If both needed API calls failed and we have cached data, use it
159
+ if (
160
+ needTextGenFetch &&
161
+ newFailedApiCalls.textGeneration &&
162
+ needImgTextFetch &&
163
+ newFailedApiCalls.imageTextToText &&
164
+ cache.data?.length
165
+ ) {
166
+ console.log("All API requests failed. Using existing cache as fallback.");
167
+ cache.status = CacheStatus.ERROR;
168
+ cache.timestamp = timestamp; // Update timestamp to avoid rapid retry loops
169
+ cache.failedApiCalls = newFailedApiCalls;
170
+ return json(cache.data);
171
+ }
172
 
173
+ // For API calls we didn't need to make, use cached models
174
+ if (!needTextGenFetch && cache.data) {
175
+ textGenModels = cache.data.filter(model => model.pipeline_tag === "text-generation").map(model => model as Model);
176
+ }
177
 
178
+ if (!needImgTextFetch && cache.data) {
179
+ imgText2TextModels = cache.data
180
+ .filter(model => model.pipeline_tag === "image-text-to-text")
181
+ .map(model => model as Model);
182
+ }
 
 
 
 
183
 
184
+ const allModels: Model[] = [...textGenModels, ...imgText2TextModels];
185
+
186
+ const modelsNeedingTokenizer: Model[] = [];
187
+
188
+ // First, use existing model data when possible
189
+ allModels.forEach(model => {
190
+ const existingModel = existingModels.get(model.id);
191
+
192
+ // Only fetch tokenizer if:
193
+ // 1. We don't have this model yet, OR
194
+ // 2. It's in our failed tokenizers list AND we're doing a refresh, OR
195
+ // 3. We're doing a full refresh
196
+ if (
197
+ !existingModel ||
198
+ (cache.failedTokenizers.includes(model.id) && elapsed >= PARTIAL_CACHE_REFRESH) ||
199
+ elapsed >= FULL_CACHE_REFRESH
200
+ ) {
201
+ modelsNeedingTokenizer.push(model);
202
  }
203
+ });
204
+
205
+ console.log(`Total models: ${allModels.length}, Models needing tokenizer fetch: ${modelsNeedingTokenizer.length}`);
206
+
207
+ // Prepare result - start with existing models we want to keep
208
+ const models: ModelWithTokenizer[] = [];
209
 
210
+ // Add models we're not re-fetching tokenizers for
211
+ allModels.forEach(model => {
212
+ const existingModel = existingModels.get(model.id);
213
+ if (existingModel && !modelsNeedingTokenizer.some(m => m.id === model.id)) {
214
+ models.push(existingModel);
215
+ }
216
  });
217
 
218
+ // Fetch tokenizer configs only for models that need it, with concurrency limit
219
+ const batchSize = 10; // Limit concurrent requests
220
+
221
+ for (let i = 0; i < modelsNeedingTokenizer.length; i += batchSize) {
222
+ const batch = modelsNeedingTokenizer.slice(i, i + batchSize);
223
+ const batchPromises = batch.map(async model => {
224
+ try {
225
+ const configUrl = `https://huggingface.co/${model.id}/raw/main/tokenizer_config.json`;
226
+ const res = await fetch(configUrl, {
227
+ credentials: "include",
228
+ headers,
229
+ method: "GET",
230
+ mode: "cors",
231
+ });
232
+
233
+ if (!res.ok) {
234
+ if (!newFailedTokenizers.includes(model.id)) {
235
+ newFailedTokenizers.push(model.id);
236
+ }
237
+ return null;
238
+ }
239
+
240
+ const tokenizerConfig = await res.json();
241
+ return { ...model, tokenizerConfig } satisfies ModelWithTokenizer;
242
+ } catch (error) {
243
+ console.error(`Error processing tokenizer for ${model.id}:`, error);
244
+ if (!newFailedTokenizers.includes(model.id)) {
245
+ newFailedTokenizers.push(model.id);
246
+ }
247
+ return null;
248
+ }
249
+ });
250
+
251
+ const batchResults = await Promise.all(batchPromises);
252
+ models.push(...batchResults.filter((model): model is ModelWithTokenizer => model !== null));
253
+ }
254
+
255
+ models.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
256
+
257
+ // Determine cache status based on failures
258
+ const hasApiFailures = newFailedApiCalls.textGeneration || newFailedApiCalls.imageTextToText;
259
+ const hasSignificantTokenizerFailures = newFailedTokenizers.length > modelsNeedingTokenizer.length * 0.2;
260
+
261
+ const cacheStatus = hasApiFailures || hasSignificantTokenizerFailures ? CacheStatus.PARTIAL : CacheStatus.SUCCESS;
262
+
263
+ cache.data = models;
264
+ cache.timestamp = timestamp;
265
+ cache.status = cacheStatus;
266
+ cache.failedTokenizers = newFailedTokenizers;
267
+ cache.failedApiCalls = newFailedApiCalls;
268
+
269
+ console.log(
270
+ `Cache updated: ${models.length} models, status: ${cacheStatus}, ` +
271
+ `failed tokenizers: ${newFailedTokenizers.length}, ` +
272
+ `API failures: text=${newFailedApiCalls.textGeneration}, img=${newFailedApiCalls.imageTextToText}`
273
  );
 
274
 
275
+ return json(models);
276
  } catch (error) {
277
  console.error("Error fetching models:", error);
278
+
279
+ // If we have cached data, use it as fallback
280
+ if (cache.data?.length) {
281
+ cache.status = CacheStatus.ERROR;
282
+ // Mark all API calls as failed so we retry them next time
283
+ cache.failedApiCalls = {
284
+ textGeneration: true,
285
+ imageTextToText: true,
286
+ };
287
+ return json(cache.data);
288
+ }
289
+
290
+ // No cache available, return empty array
291
+ cache.status = CacheStatus.ERROR;
292
+ cache.timestamp = timestamp;
293
+ cache.failedApiCalls = {
294
+ textGeneration: true,
295
+ imageTextToText: true,
296
+ };
297
  return json([]);
298
  }
299
  };