Spaces:
Running
Running
File size: 3,430 Bytes
8de92d1 ff1e468 8de92d1 ff1e468 8de92d1 ff1e468 70a8dcb ff1e468 1a43738 ff1e468 1a43738 ff1e468 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import { InferenceClient } from "@huggingface/inference";
import { useState } from "react";
export default async function inference({
prompt,
model = "Qwen/Qwen3-235B-A22B",
apiKey,
maxTokens = 512
}: {
prompt: string,
model?: string,
apiKey?: string,
maxTokens?: number
}) {
if (!apiKey) {
const token = window.localStorage.getItem("huggingface_access_token");
if (!token) {
throw new Error("You must be signed in to use the inference API!");
}
apiKey = token;
}
console.log("Inference", prompt, model, apiKey);
const client = new InferenceClient(apiKey);
const chatCompletion = await client.chatCompletion({
provider: "fireworks-ai",
model: model,
messages: [
{
role: "user",
content: prompt,
},
],
max_tokens: maxTokens,
});
console.log("Inference response", chatCompletion.choices[0].message);
return chatCompletion.choices[0].message;
}
export function useInferenceOld({ apiKey }) {
const [isLoading, setIsLoading] = useState(false);
const [partialText, setPartialText] = useState("");
const [inferenceResult, setInferenceResult] = useState("");
const [error, setError] = useState<string | null>(null);
const inferenceInternal = async ({
prompt,
model,
maxTokens,
}: {
prompt: string;
model: string;
maxTokens: number;
}) => {
setIsLoading(true);
setPartialText("boop boop partial text");
try {
const result = await inference({
prompt,
model,
apiKey,
maxTokens,
});
setInferenceResult(result.content);
setIsLoading(false);
return result.content;
} catch (error) {
console.error("Error in inference", error);
setError(error.message);
setIsLoading(false);
return null;
}
};
const status = isLoading ? "thinking" : error ? "error" : "done";
return {
status,
partialText,
inferenceResult,
error,
inference: inferenceInternal,
};
}
export function useInference({ apiKey }) {
const [isLoading, setIsLoading] = useState(false);
const [partialText, setPartialText] = useState("");
const [inferenceResult, setInferenceResult] = useState("");
const [error, setError] = useState<string | null>(null);
const inferenceInternal = async ({
prompt,
model,
maxTokens,
}: {
prompt: string;
model: string;
maxTokens: number;
}) => {
setIsLoading(true);
setPartialText("");
const client = new InferenceClient(apiKey);
try {
const stream = client.chatCompletionStream({
provider: "nebius",
model,
maxTokens,
messages: [
{
role: "user",
content: prompt,
},
],
});
let result = "";
for await (const chunk of stream) {
result += chunk.choices[0].delta.content;
setPartialText(result);
}
setIsLoading(false);
setInferenceResult(result);
return {status: "success", result};
} catch (error) {
console.error("Error in inference", error);
setError(error.message);
setIsLoading(false);
return {status: "error", result: error.message};
}
};
const status = isLoading ? "thinking" : error ? "error" : "done";
return {
status,
partialText,
inferenceResult,
error,
inference: inferenceInternal,
};
}
|