import { InferenceClient } from "@huggingface/inference"; import { useState } from "react"; export default async function inference({ prompt, model = "Qwen/Qwen3-235B-A22B", apiKey, maxTokens = 512 }: { prompt: string, model?: string, apiKey?: string, maxTokens?: number }) { if (!apiKey) { const token = window.localStorage.getItem("huggingface_access_token"); if (!token) { throw new Error("You must be signed in to use the inference API!"); } apiKey = token; } console.log("Inference", prompt, model, apiKey); const client = new InferenceClient(apiKey); const chatCompletion = await client.chatCompletion({ provider: "fireworks-ai", model: model, messages: [ { role: "user", content: prompt, }, ], max_tokens: maxTokens, }); console.log("Inference response", chatCompletion.choices[0].message); return chatCompletion.choices[0].message; } export function useInferenceOld({ apiKey }) { const [isLoading, setIsLoading] = useState(false); const [partialText, setPartialText] = useState(""); const [inferenceResult, setInferenceResult] = useState(""); const [error, setError] = useState(null); const inferenceInternal = async ({ prompt, model, maxTokens, }: { prompt: string; model: string; maxTokens: number; }) => { setIsLoading(true); setPartialText("boop boop partial text"); try { const result = await inference({ prompt, model, apiKey, maxTokens, }); setInferenceResult(result.content); setIsLoading(false); return result.content; } catch (error) { console.error("Error in inference", error); setError(error.message); setIsLoading(false); return null; } }; const status = isLoading ? "thinking" : error ? "error" : "done"; return { status, partialText, inferenceResult, error, inference: inferenceInternal, }; } export function useInference({ apiKey }) { const [isLoading, setIsLoading] = useState(false); const [partialText, setPartialText] = useState(""); const [inferenceResult, setInferenceResult] = useState(""); const [error, setError] = useState(null); const inferenceInternal = async ({ prompt, model, maxTokens, }: { prompt: string; model: string; maxTokens: number; }) => { setIsLoading(true); setPartialText(""); const client = new InferenceClient(apiKey); try { const stream = client.chatCompletionStream({ provider: "nebius", model, maxTokens, messages: [ { role: "user", content: prompt, }, ], }); let result = ""; for await (const chunk of stream) { result += chunk.choices[0].delta.content; setPartialText(result); } setIsLoading(false); setInferenceResult(result); return {status: "success", result}; } catch (error) { console.error("Error in inference", error); setError(error.message); setIsLoading(false); return {status: "error", result: error.message}; } }; const status = isLoading ? "thinking" : error ? "error" : "done"; return { status, partialText, inferenceResult, error, inference: inferenceInternal, }; }