Skip to content

feat: track internally started models by ID #3693

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 2, 2024
Merged
11 changes: 1 addition & 10 deletions core/backend/embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,11 @@ import (
)

func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model

grpcOpts := GRPCModelOpts(backendConfig)

var inferenceModel interface{}
var err error

opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*backendConfig.Threads)),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{})

if backendConfig.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
Expand Down
15 changes: 2 additions & 13 deletions core/backend/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,8 @@ import (
)

func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
threads := backendConfig.Threads
if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads
}
gRPCOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithThreads(uint32(*threads)),
model.WithContext(appConfig.Context),
model.WithModel(backendConfig.Model),
model.WithLoadGRPCLoadModelOpts(gRPCOpts),
})

opts := ModelOptions(backendConfig, appConfig, []model.Option{})

inferenceModel, err := loader.BackendLoader(
opts...,
Expand Down
13 changes: 1 addition & 12 deletions core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,11 @@ type TokenUsage struct {

func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
threads := c.Threads
if *threads == 0 && o.Threads != 0 {
threads = &o.Threads
}
grpcOpts := GRPCModelOpts(c)

var inferenceModel grpc.Backend
var err error

opts := modelOpts(c, o, []model.Option{
model.WithLoadGRPCLoadModelOpts(grpcOpts),
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
model.WithAssetDir(o.AssetsDestination),
model.WithModel(modelFile),
model.WithContext(o.Context),
})
opts := ModelOptions(c, o, []model.Option{})

if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
Expand Down
101 changes: 85 additions & 16 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,59 +11,128 @@ import (
"github.com/rs/zerolog/log"
)

func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
name := c.Name
if name == "" {
name = c.Model
}

defOpts := []model.Option{
model.WithBackendString(c.Backend),
model.WithModel(c.Model),
model.WithAssetDir(so.AssetsDestination),
model.WithContext(so.Context),
model.WithModelID(name),
}

threads := 1

if c.Threads != nil {
threads = *c.Threads
}

if so.Threads != 0 {
threads = so.Threads
}

c.Threads = &threads

grpcOpts := grpcModelOpts(c)
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))

if so.SingleBackend {
opts = append(opts, model.WithSingleActiveBackend())
defOpts = append(defOpts, model.WithSingleActiveBackend())
}

if so.ParallelBackendRequests {
opts = append(opts, model.EnableParallelRequests)
defOpts = append(defOpts, model.EnableParallelRequests)
}

if c.GRPC.Attempts != 0 {
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
}

if c.GRPC.AttemptsSleepTime != 0 {
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
}

for k, v := range so.ExternalGRPCBackends {
opts = append(opts, model.WithExternalBackend(k, v))
defOpts = append(defOpts, model.WithExternalBackend(k, v))
}

return opts
return append(defOpts, opts...)
}

func getSeed(c config.BackendConfig) int32 {
seed := int32(*c.Seed)
var seed int32 = config.RAND_SEED

if c.Seed != nil {
seed = int32(*c.Seed)
}

if seed == config.RAND_SEED {
seed = rand.Int31()
}

return seed
}

func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512
if c.Batch != 0 {
b = c.Batch
}

f16 := false
if c.F16 != nil {
f16 = *c.F16
}

embeddings := false
if c.Embeddings != nil {
embeddings = *c.Embeddings
}

lowVRAM := false
if c.LowVRAM != nil {
lowVRAM = *c.LowVRAM
}

mmap := false
if c.MMap != nil {
mmap = *c.MMap
}

ctxSize := 1024
if c.ContextSize != nil {
ctxSize = *c.ContextSize
}

mmlock := false
if c.MMlock != nil {
mmlock = *c.MMlock
}

nGPULayers := 9999999
if c.NGPULayers != nil {
nGPULayers = *c.NGPULayers
}

return &pb.ModelOptions{
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
PipelineType: c.Diffusers.PipelineType,
CFGScale: c.Diffusers.CFGScale,
LoraAdapter: c.LoraAdapter,
LoraScale: c.LoraScale,
F16Memory: *c.F16,
F16Memory: f16,
LoraBase: c.LoraBase,
IMG2IMG: c.Diffusers.IMG2IMG,
CLIPModel: c.Diffusers.ClipModel,
CLIPSubfolder: c.Diffusers.ClipSubFolder,
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
ContextSize: int32(*c.ContextSize),
ContextSize: int32(ctxSize),
Seed: getSeed(c),
NBatch: int32(b),
NoMulMatQ: c.NoMulMatQ,
Expand All @@ -85,16 +154,16 @@ func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
YarnBetaSlow: c.YarnBetaSlow,
NGQA: c.NGQA,
RMSNormEps: c.RMSNormEps,
MLock: *c.MMlock,
MLock: mmlock,
RopeFreqBase: c.RopeFreqBase,
RopeScaling: c.RopeScaling,
Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: *c.Embeddings,
LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap,
Embeddings: embeddings,
LowVRAM: lowVRAM,
NGPULayers: int32(nGPULayers),
MMap: mmap,
MainGPU: c.MainGPU,
Threads: int32(*c.Threads),
TensorSplit: c.TensorSplit,
Expand Down
16 changes: 2 additions & 14 deletions core/backend/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,9 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)

func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
bb := backend
if bb == "" {
return nil, fmt.Errorf("backend is required")
}

grpcOpts := GRPCModelOpts(backendConfig)
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
rerankModel, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
Expand Down
13 changes: 1 addition & 12 deletions core/backend/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
)

func SoundGeneration(
backend string,
modelFile string,
text string,
duration *float32,
Expand All @@ -25,18 +24,8 @@ func SoundGeneration(
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
if backend == "" {
return "", nil, fmt.Errorf("backend is a required parameter")
}

grpcOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(backend),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})

soundGenModel, err := loader.BackendLoader(opts...)
if err != nil {
Expand Down
13 changes: 1 addition & 12 deletions core/backend/token_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,13 @@ import (
)

func TokenMetrics(
backend,
modelFile string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
bb := backend
if bb == "" {
return nil, fmt.Errorf("backend is required")
}

grpcOpts := GRPCModelOpts(backendConfig)

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
opts := ModelOptions(backendConfig, appConfig, []model.Option{
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
model, err := loader.BackendLoader(opts...)
if err != nil {
Expand Down
12 changes: 5 additions & 7 deletions core/backend/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ import (

func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {

opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(model.WhisperBackend),
model.WithModel(backendConfig.Model),
model.WithContext(appConfig.Context),
model.WithThreads(uint32(*backendConfig.Threads)),
model.WithAssetDir(appConfig.AssetsDestination),
})
if backendConfig.Backend == "" {
backendConfig.Backend = model.WhisperBackend
}

opts := ModelOptions(backendConfig, appConfig, []model.Option{})

transcriptionModel, err := ml.BackendLoader(opts...)
if err != nil {
Expand Down
7 changes: 1 addition & 6 deletions core/backend/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,9 @@ func ModelTTS(
bb = model.PiperBackend
}

grpcOpts := GRPCModelOpts(backendConfig)

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
ttsModel, err := loader.BackendLoader(opts...)
if err != nil {
Expand Down
3 changes: 2 additions & 1 deletion core/cli/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {

options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend

var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}

filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
filePath, _, err := backend.SoundGeneration(t.Model, text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

Expand Down
2 changes: 1 addition & 1 deletion core/http/endpoints/elevenlabs/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
}

// TODO: Support uploading files?
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions core/http/endpoints/jina/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)

if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}

log.Debug().Msgf("Request for model: %s", modelFile)

if input.Backend != "" {
Expand All @@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
Documents: req.Documents,
}

results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
if err != nil {
return err
}
Expand Down
Loading