|
package backend |
|
|
|
import ( |
|
"math/rand" |
|
"os" |
|
"path/filepath" |
|
|
|
"github.com/mudler/LocalAI/core/config" |
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto" |
|
"github.com/mudler/LocalAI/pkg/model" |
|
"github.com/rs/zerolog/log" |
|
) |
|
|
|
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { |
|
if so.SingleBackend { |
|
opts = append(opts, model.WithSingleActiveBackend()) |
|
} |
|
|
|
if so.ParallelBackendRequests { |
|
opts = append(opts, model.EnableParallelRequests) |
|
} |
|
|
|
if c.GRPC.Attempts != 0 { |
|
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts)) |
|
} |
|
|
|
if c.GRPC.AttemptsSleepTime != 0 { |
|
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) |
|
} |
|
|
|
for k, v := range so.ExternalGRPCBackends { |
|
opts = append(opts, model.WithExternalBackend(k, v)) |
|
} |
|
|
|
return opts |
|
} |
|
|
|
func getSeed(c config.BackendConfig) int32 { |
|
seed := int32(*c.Seed) |
|
if seed == config.RAND_SEED { |
|
seed = rand.Int31() |
|
} |
|
|
|
return seed |
|
} |
|
|
|
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { |
|
b := 512 |
|
if c.Batch != 0 { |
|
b = c.Batch |
|
} |
|
return &pb.ModelOptions{ |
|
CUDA: c.CUDA || c.Diffusers.CUDA, |
|
SchedulerType: c.Diffusers.SchedulerType, |
|
PipelineType: c.Diffusers.PipelineType, |
|
CFGScale: c.Diffusers.CFGScale, |
|
LoraAdapter: c.LoraAdapter, |
|
LoraScale: c.LoraScale, |
|
F16Memory: *c.F16, |
|
LoraBase: c.LoraBase, |
|
IMG2IMG: c.Diffusers.IMG2IMG, |
|
CLIPModel: c.Diffusers.ClipModel, |
|
CLIPSubfolder: c.Diffusers.ClipSubFolder, |
|
CLIPSkip: int32(c.Diffusers.ClipSkip), |
|
ControlNet: c.Diffusers.ControlNet, |
|
ContextSize: int32(*c.ContextSize), |
|
Seed: getSeed(c), |
|
NBatch: int32(b), |
|
NoMulMatQ: c.NoMulMatQ, |
|
DraftModel: c.DraftModel, |
|
AudioPath: c.VallE.AudioPath, |
|
Quantization: c.Quantization, |
|
GPUMemoryUtilization: c.GPUMemoryUtilization, |
|
TrustRemoteCode: c.TrustRemoteCode, |
|
EnforceEager: c.EnforceEager, |
|
SwapSpace: int32(c.SwapSpace), |
|
MaxModelLen: int32(c.MaxModelLen), |
|
TensorParallelSize: int32(c.TensorParallelSize), |
|
MMProj: c.MMProj, |
|
FlashAttention: c.FlashAttention, |
|
NoKVOffload: c.NoKVOffloading, |
|
YarnExtFactor: c.YarnExtFactor, |
|
YarnAttnFactor: c.YarnAttnFactor, |
|
YarnBetaFast: c.YarnBetaFast, |
|
YarnBetaSlow: c.YarnBetaSlow, |
|
NGQA: c.NGQA, |
|
RMSNormEps: c.RMSNormEps, |
|
MLock: *c.MMlock, |
|
RopeFreqBase: c.RopeFreqBase, |
|
RopeScaling: c.RopeScaling, |
|
Type: c.ModelType, |
|
RopeFreqScale: c.RopeFreqScale, |
|
NUMA: c.NUMA, |
|
Embeddings: *c.Embeddings, |
|
LowVRAM: *c.LowVRAM, |
|
NGPULayers: int32(*c.NGPULayers), |
|
MMap: *c.MMap, |
|
MainGPU: c.MainGPU, |
|
Threads: int32(*c.Threads), |
|
TensorSplit: c.TensorSplit, |
|
|
|
ModelBaseName: c.AutoGPTQ.ModelBaseName, |
|
Device: c.AutoGPTQ.Device, |
|
UseTriton: c.AutoGPTQ.Triton, |
|
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer, |
|
|
|
Tokenizer: c.Tokenizer, |
|
} |
|
} |
|
|
|
func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { |
|
promptCachePath := "" |
|
if c.PromptCachePath != "" { |
|
p := filepath.Join(modelPath, c.PromptCachePath) |
|
err := os.MkdirAll(filepath.Dir(p), 0750) |
|
if err == nil { |
|
promptCachePath = p |
|
} else { |
|
log.Error().Err(err).Str("promptCachePath", promptCachePath).Msg("error creating prompt cache folder") |
|
} |
|
} |
|
|
|
return &pb.PredictOptions{ |
|
Temperature: float32(*c.Temperature), |
|
TopP: float32(*c.TopP), |
|
NDraft: c.NDraft, |
|
TopK: int32(*c.TopK), |
|
Tokens: int32(*c.Maxtokens), |
|
Threads: int32(*c.Threads), |
|
PromptCacheAll: c.PromptCacheAll, |
|
PromptCacheRO: c.PromptCacheRO, |
|
PromptCachePath: promptCachePath, |
|
F16KV: *c.F16, |
|
DebugMode: *c.Debug, |
|
Grammar: c.Grammar, |
|
NegativePromptScale: c.NegativePromptScale, |
|
RopeFreqBase: c.RopeFreqBase, |
|
RopeFreqScale: c.RopeFreqScale, |
|
NegativePrompt: c.NegativePrompt, |
|
Mirostat: int32(*c.LLMConfig.Mirostat), |
|
MirostatETA: float32(*c.LLMConfig.MirostatETA), |
|
MirostatTAU: float32(*c.LLMConfig.MirostatTAU), |
|
Debug: *c.Debug, |
|
StopPrompts: c.StopWords, |
|
Repeat: int32(c.RepeatLastN), |
|
FrequencyPenalty: float32(c.FrequencyPenalty), |
|
PresencePenalty: float32(c.PresencePenalty), |
|
Penalty: float32(c.RepeatPenalty), |
|
NKeep: int32(c.Keep), |
|
Batch: int32(c.Batch), |
|
IgnoreEOS: c.IgnoreEOS, |
|
Seed: getSeed(c), |
|
MLock: *c.MMlock, |
|
MMap: *c.MMap, |
|
MainGPU: c.MainGPU, |
|
TensorSplit: c.TensorSplit, |
|
TailFreeSamplingZ: float32(*c.TFZ), |
|
TypicalP: float32(*c.TypicalP), |
|
} |
|
} |
|
|