MiniSearch / server /rerankerService.ts
github-actions[bot]
Sync from https://github.com/felladrin/MiniSearch
e538a38
import { type ChildProcess, spawn } from "node:child_process";
import path from "node:path";
import { fileURLToPath } from "node:url";
import debug from "debug";
import { downloadFileFromHuggingFaceRepository } from "./downloadFileFromHuggingFaceRepository";
const fileName = path.basename(import.meta.url);
const printMessage = debug(fileName);
debug.enable(fileName);
const SERVER_HOST = "127.0.0.1";
const SERVER_PORT = 8012;
const VERBOSE_MODE = false;
const MODEL_HF_REPO = "Felladrin/gguf-jina-reranker-v1-tiny-en";
const MODEL_HF_FILE = "jina-reranker-v1-tiny-en-Q8_0.gguf";
let isReady = false;
export function getRerankerModelPath() {
return path.resolve(
path.dirname(fileURLToPath(import.meta.url)),
"models",
MODEL_HF_REPO,
MODEL_HF_FILE,
);
}
async function ensureModelExists(modelPath: string) {
await downloadFileFromHuggingFaceRepository(
MODEL_HF_REPO,
MODEL_HF_FILE,
modelPath,
);
}
export async function startRerankerService() {
printMessage("Preparing model...");
const modelPath = getRerankerModelPath();
await ensureModelExists(modelPath);
printMessage("Starting service...");
const contextSize = 2048;
const serverProcess = spawn(
"llama-server",
[
"--model",
modelPath,
"--ctx-size",
contextSize.toString(),
"--batch-size",
contextSize.toString(),
"--ubatch-size",
contextSize.toString(),
"--flash-attn",
"--host",
SERVER_HOST,
"--port",
SERVER_PORT.toString(),
"--log-verbosity",
VERBOSE_MODE ? "1" : "0",
"--threads",
"1",
"--parallel",
"1",
"--no-warmup",
"--reranking",
"--pooling",
"rank",
],
{
stdio: [
"ignore",
VERBOSE_MODE ? "pipe" : "ignore",
VERBOSE_MODE ? "pipe" : "ignore",
],
},
);
serverProcess.stderr?.on("data", (data) => {
printMessage(data.toString());
});
await new Promise<void>((resolve) => {
const checkReady = async () => {
try {
const response = await fetch(
`http://${SERVER_HOST}:${SERVER_PORT}/health`,
);
const responseJson = (await response.json()) as {
status: "ok" | string;
};
if (responseJson.status === "ok") {
isReady = true;
resolve();
} else {
setTimeout(checkReady, 100);
}
} catch {
setTimeout(checkReady, 100);
}
};
checkReady();
});
printMessage("Service ready!");
return serverProcess;
}
export function stopRerankerService(serverProcess: ChildProcess | null) {
if (serverProcess) {
serverProcess.kill();
}
}
export async function getRerankerStatus() {
if (!isReady) {
return false;
}
try {
const response = await fetch(`http://${SERVER_HOST}:${SERVER_PORT}/health`);
const responseJson = (await response.json()) as { status: "ok" | string };
return responseJson.status === "ok";
} catch {
return false;
}
}
export async function rerank(query: string, documents: string[]) {
if (!isReady) {
throw new Error("Reranker service is not ready");
}
const response = await fetch(
`http://${SERVER_HOST}:${SERVER_PORT}/v1/rerank`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "rerank",
query,
documents,
top_n: documents.length,
}),
},
);
if (!response.ok) {
throw new Error(`Reranking failed: ${response.statusText}`);
}
const result = await response.json();
return result.results as { index: number; relevance_score: number }[];
}