easydel.inference.openai_api_modules

easydel.inference.openai_api_modules#

Defines Pydantic models for the vInference API, mimicking OpenAI’s structure.

class easydel.inference.openai_api_modules.ChatCompletionRequest(*, model: str, messages: ~typing.List[~easydel.inference.openai_api_modules.ChatMessage], max_tokens: int = 16, presence_penalty: float = 0.0, frequency_penalty: float = 0.0, repetition_penalty: float = 1.0, temperature: float = 0.7, top_p: float = 1.0, top_k: int = 0, min_p: float = 0.0, suppress_tokens: ~typing.List[int] = <factory>, functions: ~typing.Optional[~typing.List[~easydel.inference.openai_api_modules.FunctionDefinition]] = None, function_call: ~typing.Optional[~typing.Union[str, ~typing.Dict[str, ~typing.Any]]] = None, tools: ~typing.Optional[~typing.List[~easydel.inference.openai_api_modules.ToolDefinition]] = None, tool_choice: ~typing.Optional[~typing.Union[str, ~typing.Dict[str, ~typing.Any]]] = None, n: ~typing.Optional[int] = 1, stream: ~typing.Optional[bool] = False, stop: ~typing.Optional[~typing.Union[str, ~typing.List[str]]] = None, logit_bias: ~typing.Optional[~typing.Dict[str, float]] = None, user: ~typing.Optional[str] = None)[source]#

Bases: BaseModel

Represents a request to the chat completion endpoint. Mirrors the OpenAI ChatCompletion request structure.

frequency_penalty: float#

function_call: Optional[Union[str, Dict[str, Any]]]#

functions: Optional[List[FunctionDefinition]]#

logit_bias: Optional[Dict[str, float]]#

max_tokens: int#

messages: List[ChatMessage]#

min_p: float#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

n: Optional[int]#

presence_penalty: float#

repetition_penalty: float#

stop: Optional[Union[str, List[str]]]#

stream: Optional[bool]#

suppress_tokens: List[int]#

temperature: float#

tool_choice: Optional[Union[str, Dict[str, Any]]]#

tools: Optional[List[ToolDefinition]]#

top_k: int#

top_p: float#

user: Optional[str]#

class easydel.inference.openai_api_modules.ChatCompletionResponse(*, id: str = <factory>, object: str = 'chat.completion', created: int = <factory>, model: str, choices: ~typing.List[~easydel.inference.openai_api_modules.ChatCompletionResponseChoice], usage: ~easydel.inference.openai_api_modules.UsageInfo)[source]#

Bases: BaseModel

Represents a non-streaming response from the chat completion endpoint.

choices: List[ChatCompletionResponseChoice]#

created: int#

id: str#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

object: str#

usage: UsageInfo#

class easydel.inference.openai_api_modules.ChatCompletionResponseChoice(*, index: int, message: ChatMessage, finish_reason: Optional[Literal['stop', 'length', 'function_call']] = None)[source]#

Bases: BaseModel

Represents a single choice within a non-streaming chat completion response.

finish_reason: Optional[Literal['stop', 'length', 'function_call']]#

index: int#

message: ChatMessage#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class easydel.inference.openai_api_modules.ChatCompletionStreamResponse(*, id: str = <factory>, object: str = 'chat.completion.chunk', created: int = <factory>, model: str, choices: ~typing.List[~easydel.inference.openai_api_modules.ChatCompletionStreamResponseChoice], usage: ~easydel.inference.openai_api_modules.UsageInfo)[source]#

Bases: BaseModel

Represents a single chunk in a streaming response from the chat completion endpoint.

choices: List[ChatCompletionStreamResponseChoice]#

created: int#

id: str#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

object: str#

usage: UsageInfo#

class easydel.inference.openai_api_modules.ChatCompletionStreamResponseChoice(*, index: int, delta: DeltaMessage, finish_reason: Optional[Literal['stop', 'length', 'function_call']] = None)[source]#

Bases: BaseModel

Represents a single choice within a streaming chat completion response chunk.

delta: DeltaMessage#

finish_reason: Optional[Literal['stop', 'length', 'function_call']]#

index: int#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class easydel.inference.openai_api_modules.ChatMessage(*, role: str, content: Union[str, List[Mapping[str, str]]], name: Optional[str] = None, function_call: Optional[Dict[str, Any]] = None)[source]#

Bases: BaseModel

Represents a single message in a chat conversation.

content: Union[str, List[Mapping[str, str]]]#

function_call: Optional[Dict[str, Any]]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

name: Optional[str]#

role: str#

class easydel.inference.openai_api_modules.CompletionLogprobs(*, tokens: List[str], token_logprobs: List[float], top_logprobs: Optional[List[Dict[str, float]]] = None, text_offset: Optional[List[int]] = None)[source]#

Bases: BaseModel

Log probabilities for token generation.

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

text_offset: Optional[List[int]]#

token_logprobs: List[float]#

tokens: List[str]#

top_logprobs: Optional[List[Dict[str, float]]]#

class easydel.inference.openai_api_modules.CompletionRequest(*, model: str, prompt: ~typing.Union[str, ~typing.List[str]], max_tokens: int = 16, presence_penalty: float = 0.0, frequency_penalty: float = 0.0, repetition_penalty: float = 1.0, temperature: float = 0.7, top_p: float = 1.0, top_k: int = 0, min_p: float = 0.0, suppress_tokens: ~typing.List[int] = <factory>, n: ~typing.Optional[int] = 1, stream: ~typing.Optional[bool] = False, stop: ~typing.Optional[~typing.Union[str, ~typing.List[str]]] = None, logit_bias: ~typing.Optional[~typing.Dict[str, float]] = None, user: ~typing.Optional[str] = None)[source]#

Bases: BaseModel

Represents a request to the completions endpoint. Mirrors the OpenAI Completion request structure.

frequency_penalty: float#

logit_bias: Optional[Dict[str, float]]#

max_tokens: int#

min_p: float#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

n: Optional[int]#

presence_penalty: float#

prompt: Union[str, List[str]]#

repetition_penalty: float#

stop: Optional[Union[str, List[str]]]#

stream: Optional[bool]#

suppress_tokens: List[int]#

temperature: float#

top_k: int#

top_p: float#

user: Optional[str]#

class easydel.inference.openai_api_modules.CompletionResponse(*, id: str = <factory>, object: str = 'text_completion', created: int = <factory>, model: str, choices: ~typing.List[~easydel.inference.openai_api_modules.CompletionResponseChoice], usage: ~easydel.inference.openai_api_modules.UsageInfo)[source]#

Bases: BaseModel

Represents a response from the completions endpoint.

choices: List[CompletionResponseChoice]#

created: int#

id: str#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

object: str#

usage: UsageInfo#

class easydel.inference.openai_api_modules.CompletionResponseChoice(*, text: str, index: int, logprobs: Optional[CompletionLogprobs] = None, finish_reason: Optional[Literal['stop', 'length']] = None)[source]#

Bases: BaseModel

Represents a single choice within a completion response.

finish_reason: Optional[Literal['stop', 'length']]#

index: int#

logprobs: Optional[CompletionLogprobs]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

text: str#

class easydel.inference.openai_api_modules.CompletionStreamResponse(*, id: str = <factory>, object: str = 'text_completion.chunk', created: int = <factory>, model: str, choices: ~typing.List[~easydel.inference.openai_api_modules.CompletionStreamResponseChoice], usage: ~typing.Optional[~easydel.inference.openai_api_modules.UsageInfo] = None)[source]#

Bases: BaseModel

Represents a streaming response from the completions endpoint.

choices: List[CompletionStreamResponseChoice]#

created: int#

id: str#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

object: str#

usage: Optional[UsageInfo]#

class easydel.inference.openai_api_modules.CompletionStreamResponseChoice(*, index: int, text: str, logprobs: Optional[CompletionLogprobs] = None, finish_reason: Optional[Literal['stop', 'length']] = None)[source]#

Bases: BaseModel

Represents a single choice within a streaming completion response chunk.

finish_reason: Optional[Literal['stop', 'length']]#

index: int#

logprobs: Optional[CompletionLogprobs]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

text: str#

class easydel.inference.openai_api_modules.CountTokenRequest(*, model: str, conversation: Union[str, List[ChatMessage]])[source]#

Bases: BaseModel

Represents a request to the token counting endpoint.

conversation: Union[str, List[ChatMessage]]#

model: str#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class easydel.inference.openai_api_modules.DeltaMessage(*, role: Optional[str] = None, content: Optional[Union[str, List[Mapping[str, str]]]] = None, function_call: Optional[Dict[str, Any]] = None)[source]#

Bases: BaseModel

Represents a change (delta) in a chat message, used in streaming responses.

content: Optional[Union[str, List[Mapping[str, str]]]]#

function_call: Optional[Dict[str, Any]]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

role: Optional[str]#

class easydel.inference.openai_api_modules.FunctionDefinition(*, name: str, description: ~typing.Optional[str] = None, parameters: ~typing.Dict[str, ~typing.Any] = <factory>, required: ~typing.Optional[~typing.List[str]] = None)[source]#

Bases: BaseModel

Defines a function that can be called by the model.

description: Optional[str]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

name: str#

parameters: Dict[str, Any]#

required: Optional[List[str]]#

class easydel.inference.openai_api_modules.ToolDefinition(*, type: str = 'function', function: FunctionDefinition)[source]#

Bases: BaseModel

Defines a tool that can be called by the model.

function: FunctionDefinition#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

type: str#

class easydel.inference.openai_api_modules.UsageInfo(*, prompt_tokens: int = 0, completion_tokens: Optional[int] = 0, total_tokens: int = 0, tokens_per_second: float = 0, processing_time: float = 0.0)[source]#

Bases: BaseModel

Provides information about token usage and processing time for a request.

completion_tokens: Optional[int]#

model_config: ClassVar[ConfigDict] = {}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

processing_time: float#

prompt_tokens: int#

tokens_per_second: float#

total_tokens: int#

easydel.inference.openai_api_modules

Contents

easydel.inference.openai_api_modules#