Spaces:
Configuration error
Configuration error
syntax = "proto3"; | |
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto"; | |
option java_multiple_files = true; | |
option java_package = "io.skynet.localai.backend"; | |
option java_outer_classname = "LocalAIBackend"; | |
package backend; | |
service Backend { | |
rpc Health(HealthMessage) returns (Reply) {} | |
rpc Predict(PredictOptions) returns (Reply) {} | |
rpc LoadModel(ModelOptions) returns (Result) {} | |
rpc PredictStream(PredictOptions) returns (stream Reply) {} | |
rpc Embedding(PredictOptions) returns (EmbeddingResult) {} | |
rpc GenerateImage(GenerateImageRequest) returns (Result) {} | |
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {} | |
rpc TTS(TTSRequest) returns (Result) {} | |
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {} | |
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {} | |
rpc Status(HealthMessage) returns (StatusResponse) {} | |
rpc StoresSet(StoresSetOptions) returns (Result) {} | |
rpc StoresDelete(StoresDeleteOptions) returns (Result) {} | |
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {} | |
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} | |
rpc Rerank(RerankRequest) returns (RerankResult) {} | |
rpc GetMetrics(MetricsRequest) returns (MetricsResponse); | |
} | |
// Define the empty request | |
message MetricsRequest {} | |
message MetricsResponse { | |
int32 slot_id = 1; | |
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string. | |
float tokens_per_second = 3; | |
int32 tokens_generated = 4; | |
int32 prompt_tokens_processed = 5; | |
} | |
message RerankRequest { | |
string query = 1; | |
repeated string documents = 2; | |
int32 top_n = 3; | |
} | |
message RerankResult { | |
Usage usage = 1; | |
repeated DocumentResult results = 2; | |
} | |
message Usage { | |
int32 total_tokens = 1; | |
int32 prompt_tokens = 2; | |
} | |
message DocumentResult { | |
int32 index = 1; | |
string text = 2; | |
float relevance_score = 3; | |
} | |
message StoresKey { | |
repeated float Floats = 1; | |
} | |
message StoresValue { | |
bytes Bytes = 1; | |
} | |
message StoresSetOptions { | |
repeated StoresKey Keys = 1; | |
repeated StoresValue Values = 2; | |
} | |
message StoresDeleteOptions { | |
repeated StoresKey Keys = 1; | |
} | |
message StoresGetOptions { | |
repeated StoresKey Keys = 1; | |
} | |
message StoresGetResult { | |
repeated StoresKey Keys = 1; | |
repeated StoresValue Values = 2; | |
} | |
message StoresFindOptions { | |
StoresKey Key = 1; | |
int32 TopK = 2; | |
} | |
message StoresFindResult { | |
repeated StoresKey Keys = 1; | |
repeated StoresValue Values = 2; | |
repeated float Similarities = 3; | |
} | |
message HealthMessage {} | |
// The request message containing the user's name. | |
message PredictOptions { | |
string Prompt = 1; | |
int32 Seed = 2; | |
int32 Threads = 3; | |
int32 Tokens = 4; | |
int32 TopK = 5; | |
int32 Repeat = 6; | |
int32 Batch = 7; | |
int32 NKeep = 8; | |
float Temperature = 9; | |
float Penalty = 10; | |
bool F16KV = 11; | |
bool DebugMode = 12; | |
repeated string StopPrompts = 13; | |
bool IgnoreEOS = 14; | |
float TailFreeSamplingZ = 15; | |
float TypicalP = 16; | |
float FrequencyPenalty = 17; | |
float PresencePenalty = 18; | |
int32 Mirostat = 19; | |
float MirostatETA = 20; | |
float MirostatTAU = 21; | |
bool PenalizeNL = 22; | |
string LogitBias = 23; | |
bool MLock = 25; | |
bool MMap = 26; | |
bool PromptCacheAll = 27; | |
bool PromptCacheRO = 28; | |
string Grammar = 29; | |
string MainGPU = 30; | |
string TensorSplit = 31; | |
float TopP = 32; | |
string PromptCachePath = 33; | |
bool Debug = 34; | |
repeated int32 EmbeddingTokens = 35; | |
string Embeddings = 36; | |
float RopeFreqBase = 37; | |
float RopeFreqScale = 38; | |
float NegativePromptScale = 39; | |
string NegativePrompt = 40; | |
int32 NDraft = 41; | |
repeated string Images = 42; | |
bool UseTokenizerTemplate = 43; | |
repeated Message Messages = 44; | |
repeated string Videos = 45; | |
repeated string Audios = 46; | |
string CorrelationId = 47; | |
} | |
// The response message containing the result | |
message Reply { | |
bytes message = 1; | |
int32 tokens = 2; | |
int32 prompt_tokens = 3; | |
} | |
message ModelOptions { | |
string Model = 1; | |
int32 ContextSize = 2; | |
int32 Seed = 3; | |
int32 NBatch = 4; | |
bool F16Memory = 5; | |
bool MLock = 6; | |
bool MMap = 7; | |
bool VocabOnly = 8; | |
bool LowVRAM = 9; | |
bool Embeddings = 10; | |
bool NUMA = 11; | |
int32 NGPULayers = 12; | |
string MainGPU = 13; | |
string TensorSplit = 14; | |
int32 Threads = 15; | |
string LibrarySearchPath = 16; | |
float RopeFreqBase = 17; | |
float RopeFreqScale = 18; | |
float RMSNormEps = 19; | |
int32 NGQA = 20; | |
string ModelFile = 21; | |
// AutoGPTQ | |
string Device = 22; | |
bool UseTriton = 23; | |
string ModelBaseName = 24; | |
bool UseFastTokenizer = 25; | |
// Diffusers | |
string PipelineType = 26; | |
string SchedulerType = 27; | |
bool CUDA = 28; | |
float CFGScale = 29; | |
bool IMG2IMG = 30; | |
string CLIPModel = 31; | |
string CLIPSubfolder = 32; | |
int32 CLIPSkip = 33; | |
string ControlNet = 48; | |
string Tokenizer = 34; | |
// LLM (llama.cpp) | |
string LoraBase = 35; | |
string LoraAdapter = 36; | |
float LoraScale = 42; | |
bool NoMulMatQ = 37; | |
string DraftModel = 39; | |
string AudioPath = 38; | |
// vllm | |
string Quantization = 40; | |
float GPUMemoryUtilization = 50; | |
bool TrustRemoteCode = 51; | |
bool EnforceEager = 52; | |
int32 SwapSpace = 53; | |
int32 MaxModelLen = 54; | |
int32 TensorParallelSize = 55; | |
string LoadFormat = 58; | |
string MMProj = 41; | |
string RopeScaling = 43; | |
float YarnExtFactor = 44; | |
float YarnAttnFactor = 45; | |
float YarnBetaFast = 46; | |
float YarnBetaSlow = 47; | |
string Type = 49; | |
bool FlashAttention = 56; | |
bool NoKVOffload = 57; | |
string ModelPath = 59; | |
repeated string LoraAdapters = 60; | |
repeated float LoraScales = 61; | |
} | |
message Result { | |
string message = 1; | |
bool success = 2; | |
} | |
message EmbeddingResult { | |
repeated float embeddings = 1; | |
} | |
message TranscriptRequest { | |
string dst = 2; | |
string language = 3; | |
uint32 threads = 4; | |
bool translate = 5; | |
} | |
message TranscriptResult { | |
repeated TranscriptSegment segments = 1; | |
string text = 2; | |
} | |
message TranscriptSegment { | |
int32 id = 1; | |
int64 start = 2; | |
int64 end = 3; | |
string text = 4; | |
repeated int32 tokens = 5; | |
} | |
message GenerateImageRequest { | |
int32 height = 1; | |
int32 width = 2; | |
int32 mode = 3; | |
int32 step = 4; | |
int32 seed = 5; | |
string positive_prompt = 6; | |
string negative_prompt = 7; | |
string dst = 8; | |
string src = 9; | |
// Diffusers | |
string EnableParameters = 10; | |
int32 CLIPSkip = 11; | |
} | |
message TTSRequest { | |
string text = 1; | |
string model = 2; | |
string dst = 3; | |
string voice = 4; | |
optional string language = 5; | |
} | |
message SoundGenerationRequest { | |
string text = 1; | |
string model = 2; | |
string dst = 3; | |
optional float duration = 4; | |
optional float temperature = 5; | |
optional bool sample = 6; | |
optional string src = 7; | |
optional int32 src_divisor = 8; | |
} | |
message TokenizationResponse { | |
int32 length = 1; | |
repeated int32 tokens = 2; | |
} | |
message MemoryUsageData { | |
uint64 total = 1; | |
map<string, uint64> breakdown = 2; | |
} | |
message StatusResponse { | |
enum State { | |
UNINITIALIZED = 0; | |
BUSY = 1; | |
READY = 2; | |
ERROR = -1; | |
} | |
State state = 1; | |
MemoryUsageData memory = 2; | |
} | |
message Message { | |
string role = 1; | |
string content = 2; | |
} |