File size: 1,733 Bytes
755dd12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import { DefaultSystem } from '../utils/constant';
import { IChatInputMessage, IStreamHandler } from '../interface';
import { BaseChat } from './base/base';
import { LMStudioClient } from '@lmstudio/sdk';
const host = process.env.OLLAMA_HOST || 'localhost:1234';
const lmstudioClient = new LMStudioClient({
baseUrl: `ws://${host}`
});
/**
* run large language models locally with LMStudio.
*/
export class LMStudioChat implements BaseChat {
public platform = 'lmstudio';
public async chat(
messages: IChatInputMessage[],
model = 'lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF',
system = DefaultSystem
): Promise<string | null> {
if (system) {
messages.unshift({
role: 'system',
content: system
});
}
const llama3 = await lmstudioClient.llm.load(model);
const response = await llama3.respond(messages);
return response.content;
}
public async chatStream(
messages: IChatInputMessage[],
onMessage: IStreamHandler,
model = 'llama2',
system = DefaultSystem
): Promise<void> {
if (system) {
messages.unshift({
role: 'system',
content: system
});
}
const llama3 = await lmstudioClient.llm.load(model);
const response = llama3.respond(messages);
for await (const chunk of response) {
onMessage?.(chunk, false);
}
onMessage?.(null, true);
}
public async list() {
const models = await lmstudioClient.llm.listLoaded();
if (models.length === 0) return Promise.reject('No models loaded.');
return {
models: models.map((x: any) => {
return {
name: x.identifier
};
})
};
}
}
export const lmstudio = new LMStudioChat();
|