easydel.infra.elarge_model.types

Contents

easydel.infra.elarge_model.types#

Type definitions for ELM configuration system.

This module defines the TypedDict structures used throughout the ELM configuration system, providing type safety and documentation for configuration schemas.

class easydel.infra.elarge_model.types.BaseCfg[source]#

Bases: TypedDict

Base configuration values container.

values#

Dictionary of base configuration values that get passed to the model’s config during initialization

Type

easydel.infra.base_config.EasyDeLBaseConfigDict | dict[str, Any]

operation_configs#

ejkernel operation config overrides. Maps implementation names (e.g., “flash_attn2”, “ring”) to their config objects. When set, overrides ejkernel autotune.

Type

easydel.infra.elarge_model.types.OperationConfigsDict | None

operation_configs: easydel.infra.elarge_model.types.OperationConfigsDict | None#
class easydel.infra.elarge_model.types.DataMixtureCfg[source]#

Bases: TypedDict

Data mixture configuration for training/evaluation datasets.

informs#

List of dataset configurations (text or visual)

Type

list[easydel.infra.elarge_model.types.TextDatasetInformCfg | easydel.infra.elarge_model.types.VisualDatasetInformCfg]

cache_dir#

Directory for caching datasets (default: ~/.cache/easydel)

Type

str

streaming#

Whether to use streaming mode for large datasets (default: True)

Type

bool

text_target_field#

Target field name for text in unified dataset (default: “text”)

Type

str

image_target_field#

Target field name for images in unified dataset (default: “image”)

Type

str

batch_size#

Batch size for data loading (default: 1)

Type

int

shuffle_buffer_size#

Buffer size for shuffling in streaming mode (default: None)

Type

int | None

seed#

Random seed for shuffling and sampling (default: 42)

Type

int | None

# Token packing configuration
pack_tokens#

Enable pre-tokenized sequence packing (default: False)

Type

bool

tokens_field_name#

Field name containing token IDs (default: “tokens”)

Type

str

pack_seq_length#

Target sequence length for packing (default: None)

Type

int | None

pack_eos_token_id#

EOS token ID for padding/separation (default: 0)

Type

int

pack_shuffle#

Shuffle packed sequences (default: True)

Type

bool

pack_shuffle_buffer_factor#

Buffer size multiplier for shuffle (default: 16)

Type

int

dask_storage_options#

Storage options for dask/remote files (default: None)

Type

dict | None

# On-the-fly tokenization and packing
pack_on_the_fly#

Enable on-the-fly tokenization and packing (default: False)

Type

bool

tokenize_callback#

Function to tokenize examples, returns token IDs (default: None)

Type

Optional[Callable[[dict], list[int]]]

# Block-deterministic mixture configuration
block_mixture#

Use deterministic block mixing instead of standard interleave (default: True)

Type

bool

mixture_block_size#

Number of examples per block (default: 2048)

Type

int

stop_strategy#

Strategy when dataset exhausted - “restart” or “first_exhausted” (default: “restart”)

Type

str

mixture_weights#

Per-dataset weights as dict mapping dataset identifier to weight (default: None)

Type

dict[str, float] | None

# Tokenization configuration
tokenization#

Configuration for tokenizing the dataset (default: None)

Type

easydel.infra.elarge_model.types.TokenizationCfg | None

# Save configuration
save#

Configuration for saving the processed dataset (default: None)

Type

easydel.infra.elarge_model.types.DatasetSaveCfg | None

# ShardedDataSource configuration
Type

new data pipeline

use_sharded_source#

Use new ShardedDataSource architecture (default: False) When True, builds a ShardedDataSource instead of HF Dataset for more efficient streaming and lazy transforms.

Type

bool

# Legacy/deprecated attributes
Type

kept for compatibility

use_fast_loader#

Enable fast data loading with fsspec (deprecated)

Type

bool

num_workers#

Number of parallel workers for data loading (deprecated)

Type

int

prefetch_size#

Number of batches to prefetch (deprecated)

Type

int

enable_caching#

Enable dataset caching for faster reloads (deprecated)

Type

bool

batch_size: int#
block_mixture: bool#
cache_dir: str#
dask_storage_options: dict | None#
enable_caching: bool#
image_target_field: str#
informs: list[easydel.infra.elarge_model.types.TextDatasetInformCfg | easydel.infra.elarge_model.types.VisualDatasetInformCfg]#
mixture_block_size: int#
mixture_weights: dict[str, float] | None#
num_workers: int#
pack_eos_token_id: int#
pack_on_the_fly: bool#
pack_seq_length: int | None#
pack_shuffle: bool#
pack_shuffle_buffer_factor: int#
pack_tokens: bool#
prefetch_size: int#
save: easydel.infra.elarge_model.types.DatasetSaveCfg | None#
seed: int | None#
shuffle_buffer_size: int | None#
stop_strategy: str#
streaming: bool#
text_target_field: str#
tokenization: easydel.infra.elarge_model.types.TokenizationCfg | None#
tokenize_callback: Optional[Callable[[dict], list[int]]]#
tokens_field_name: str#
use_fast_loader: bool#
use_sharded_source: bool#
class easydel.infra.elarge_model.types.DatasetSaveCfg[source]#

Bases: TypedDict

Configuration for saving processed/tokenized datasets.

output_path#

Path to save the dataset (required)

Type

str

format#

Output format - “parquet”, “arrow”, “json”, “jsonl” (default: “parquet”)

Type

Literal[‘parquet’, ‘arrow’, ‘json’, ‘jsonl’]

num_shards#

Number of shards to split the dataset into (default: None, auto)

Type

int | None

compression#

Compression algorithm - “snappy”, “gzip”, “zstd”, None (default: “snappy”)

Type

Optional[Literal[‘snappy’, ‘gzip’, ‘zstd’]]

max_shard_size#

Maximum shard size in bytes or string like “500MB” (default: “500MB”)

Type

str | int

overwrite#

Whether to overwrite existing files (default: False)

Type

bool

push_to_hub#

Whether to push to HuggingFace Hub (default: False)

Type

bool

hub_repo_id#

HuggingFace Hub repository ID (required if push_to_hub=True)

Type

str | None

hub_private#

Whether to make the Hub repository private (default: False)

Type

bool

hub_token#

HuggingFace token for authentication (default: None, use env)

Type

str | None

compression: Optional[Literal['snappy', 'gzip', 'zstd']]#
format: Literal['parquet', 'arrow', 'json', 'jsonl']#
hub_private: bool#
hub_repo_id: str | None#
hub_token: str | None#
max_shard_size: str | int#
num_shards: int | None#
output_path: str#
overwrite: bool#
push_to_hub: bool#
class easydel.infra.elarge_model.types.ELMConfig[source]#

Bases: TypedDict

Complete ELM configuration structure.

This is the top-level configuration type that combines all configuration sections for model loading, sharding, quantization, inference, training, and data.

model#

Model configuration (required)

Type

easydel.infra.elarge_model.types.ModelCfg

teacher_model#

Teacher model configuration for distillation training

Type

easydel.infra.elarge_model.types.ModelCfg

reference_model#

Reference model configuration for preference optimization (DPO, etc.)

Type

easydel.infra.elarge_model.types.ModelCfg

loader#

Model loading configuration

Type

easydel.infra.elarge_model.types.LoaderCfg

sharding#

Distributed sharding configuration

Type

easydel.infra.elarge_model.types.ShardingCfg

platform#

Platform and backend configuration

Type

easydel.infra.elarge_model.types.PlatformCfg

quantization#

Quantization configuration

Type

easydel.infra.elarge_model.types.QuantizationCfg

base_config#

Base model configuration values

Type

easydel.infra.elarge_model.types.BaseCfg

mixture#

Data mixture configuration for training/evaluation datasets

Type

easydel.infra.elarge_model.types.DataMixtureCfg

esurge#

eSurge inference engine configuration

Type

easydel.infra.elarge_model.types.eSurgeCfg

trainer#

Training configuration

Type

easydel.infra.elarge_model.trainer_types.TrainerConfig

eval#

Evaluation configuration for lm-evaluation-harness

Type

easydel.infra.elarge_model.types.EvalKwargs

Example

>>> # Basic configuration
>>> config: ELMConfig = {
...     "model": {"name_or_path": "meta-llama/Llama-2-7b"},
...     "loader": {"dtype": "bf16"},
...     "sharding": {"axis_dims": (1, 1, 1, -1, 1)},
...     "mixture": {
...         "informs": [
...             {"type": "json", "data_files": "train.json", "content_field": "text"},
...             {"type": "parquet", "data_files": "valid/*.parquet", "content_field": "content"}
...         ],
...         "batch_size": 32
...     }
... }
>>>
>>> # Advanced configuration with distillation, DPO, and token packing
>>> config: ELMConfig = {
...     "model": {"name_or_path": "meta-llama/Llama-2-7b"},
...     "teacher_model": {"name_or_path": "meta-llama/Llama-2-13b"},  # For distillation
...     "reference_model": {"name_or_path": "meta-llama/Llama-2-7b-instruct"},  # For DPO
...     "loader": {"dtype": "bf16", "param_dtype": "fp32"},
...     "sharding": {"axis_dims": (1, 1, 1, -1, 1)},
...     "mixture": {
...         "informs": [
...             {"type": "json", "data_files": "train/*.json", "format_fields": {"prompt": "text"}},
...             {"type": "parquet", "data_files": "valid/*.parquet"}
...         ],
...         "batch_size": 32,
...         "block_mixture": True,  # Use deterministic block mixing
...         "mixture_weights": {"train": 0.8, "valid": 0.2},
...         "pack_tokens": True,  # Enable token packing
...         "pack_seq_length": 2048,
...         "pack_eos_token_id": 2
...     },
...     "esurge": {"max_model_len": 4096, "enable_prefix_caching": True},
...     "eval": {"max_new_tokens": 1024, "temperature": 0.0}
... }
base_config: BaseCfg#
esurge: eSurgeCfg#
eval: EvalKwargs#
loader: LoaderCfg#
mixture: DataMixtureCfg#
model: ModelCfg#
platform: PlatformCfg#
quantization: QuantizationCfg#
reference_model: ModelCfg#
sharding: ShardingCfg#
teacher_model: ModelCfg#
trainer: TrainerConfig#
class easydel.infra.elarge_model.types.EasyDeLQuantizationCfg[source]#

Bases: TypedDict

Extended quantization config with pattern support for layer selection.

This config extends eformer’s QuantizationConfig with an additional pattern field for selecting which layers to quantize.

dtype#

The quantization type (NF4, INT8, TERNARY, BINARY).

Type

Literal[‘nf4’, ‘int8’, ‘ternary’, ‘binary’]

block_size#

Block size for block-wise quantization.

Type

int

simulate#

If True, uses STE without actual bit packing (QAT mode).

Type

bool

use_kernel#

If True, uses optimized TPU/GPU kernels when available.

Type

bool

pattern#

Regex pattern for selecting layers to quantize. Default excludes embedding and norm layers.

Type

str

block_size: int#
dtype: Literal['nf4', 'int8', 'ternary', 'binary']#
pattern: str#
simulate: bool#
use_kernel: bool#
class easydel.infra.elarge_model.types.EvalKwargs[source]#

Bases: TypedDict

Evaluation keyword arguments for lm-evaluation-harness.

max_new_tokens#

Maximum number of tokens to generate (default: 2048)

Type

int

temperature#

Sampling temperature for generation (default: 0.0)

Type

float

top_p#

Top-p sampling parameter (default: 0.95)

Type

float

batch_size#

Evaluation batch size (default: engine-specific)

Type

int | None

use_tqdm#

Show progress bar during evaluation (default: True)

Type

bool

limit#

Maximum number of examples to evaluate per task

Type

int | float | None

cache_requests#

Whether to cache model outputs

Type

bool

check_integrity#

Whether to check task integrity

Type

bool

write_out#

Whether to write outputs to file

Type

bool

log_samples#

Whether to log individual samples

Type

bool

system_instruction#

System instruction for chat models

Type

str | None

apply_chat_template#

Whether to apply chat template

Type

bool

fewshot_as_multiturn#

Use fewshot examples as multi-turn conversation

Type

bool

gen_kwargs#

Additional generation kwargs

Type

dict[str, Any] | None

predict_only#

Only run predictions without scoring

Type

bool

random_seed#

Random seed for reproducibility

Type

int | None

numpy_random_seed#

NumPy random seed

Type

int | None

torch_random_seed#

PyTorch random seed

Type

int | None

fewshot_random_seed#

Random seed for fewshot sampling

Type

int | None

apply_chat_template: bool#
batch_size: int | None#
cache_requests: bool#
check_integrity: bool#
fewshot_as_multiturn: bool#
fewshot_random_seed: int | None#
gen_kwargs: dict[str, Any] | None#
limit: int | float | None#
log_samples: bool#
max_new_tokens: int#
numpy_random_seed: int | None#
predict_only: bool#
random_seed: int | None#
system_instruction: str | None#
temperature: float#
top_p: float#
torch_random_seed: int | None#
use_tqdm: bool#
write_out: bool#
class easydel.infra.elarge_model.types.LoaderCfg[source]#

Bases: TypedDict

Model loading configuration.

device#

Device to load model on

Type

Any

dtype#

Computation data type (e.g., “bf16”, “fp16”, “fp32”)

Type

Union[str, numpy.dtype, type, Literal[‘fp8’, ‘bf16’, ‘fp16’, ‘fp32’]]

param_dtype#

Parameter storage data type

Type

Union[str, numpy.dtype, type, Literal[‘fp8’, ‘bf16’, ‘fp16’, ‘fp32’]]

precision#

JAX precision level for matmuls

Type

Union[str, jax._src.lax.lax.Precision, None, Literal[‘HIGH’, ‘DEFAULT’, ‘HIGHEST’]]

verbose#

Enable verbose loading output

Type

bool

from_torch#

Whether to convert from PyTorch checkpoint

Type

bool | None

device: Any#
dtype: Union[str, dtype, type, Literal['fp8', 'bf16', 'fp16', 'fp32']]#
from_torch: bool | None#
param_dtype: Union[str, dtype, type, Literal['fp8', 'bf16', 'fp16', 'fp32']]#
precision: Union[str, Precision, None, Literal['HIGH', 'DEFAULT', 'HIGHEST']]#
verbose: bool#
class easydel.infra.elarge_model.types.ModelCfg[source]#

Bases: TypedDict

Model configuration section.

name_or_path#

HuggingFace model ID or local path (required)

Type

str

tokenizer#

Custom tokenizer path, defaults to name_or_path

Type

str

task#

Task type for auto-detection override

Type

Union[easydel.infra.factory.TaskType, str, Literal[‘causal-language-model’, ‘vision-language-model’, ‘diffusion-language-model’, ‘image-text-to-text’, ‘base-module’, ‘vision-module’, ‘sequence-to-sequence’, ‘speech-sequence-to-sequence’, ‘zero-shot-image-classification’, ‘sequence-classification’, ‘audio-classification’, ‘image-classification’, ‘auto-bind’]]

extra_kwargs#

Additional model loading arguments

Type

dict[str, Any]

extra_kwargs: dict[str, Any]#
name_or_path: str#
task: Union[TaskType, str, Literal['causal-language-model', 'vision-language-model', 'diffusion-language-model', 'image-text-to-text', 'base-module', 'vision-module', 'sequence-to-sequence', 'speech-sequence-to-sequence', 'zero-shot-image-classification', 'sequence-classification', 'audio-classification', 'image-classification', 'auto-bind']]#
tokenizer: str#
class easydel.infra.elarge_model.types.OperationConfigsDict[source]#

Bases: TypedDict

Configuration dictionary for ejkernel operation overrides.

Maps operation implementation names to their corresponding config objects. Keys must match the names registered in OperationRegistry (via get_impl_name()). When a config is provided for an operation, it overrides ejkernel’s autotune. When None or not set, ejkernel will use its default autotune behavior.

flash_attn2#

Config for flash attention 2 implementation.

Type

NotRequired[‘BaseOperationConfig | None’]

ring#

Config for ring attention.

Type

NotRequired[‘BaseOperationConfig | None’]

blocksparse#

Config for block sparse attention.

Type

NotRequired[‘BaseOperationConfig | None’]

ragged_page_attention_v2#

Config for ragged page attention v2.

Type

NotRequired[‘BaseOperationConfig | None’]

ragged_page_attention_v3#

Config for ragged page attention v3.

Type

NotRequired[‘BaseOperationConfig | None’]

sdpa#

Config for scaled dot product attention (also registered as cudnn, cuda_flash_attn2).

Type

NotRequired[‘BaseOperationConfig | None’]

vanilla#

Config for vanilla attention.

Type

NotRequired[‘BaseOperationConfig | None’]

Example

>>> from easydel import FlashAttentionConfig, RingAttentionConfig
>>> operation_configs: OperationConfigsDict = {
...     "flash_attn2": FlashAttentionConfig(platform="triton"),
...     "ring": RingAttentionConfig(),
... }
blocksparse: NotRequired['BaseOperationConfig | None']#
flash_attn2: NotRequired['BaseOperationConfig | None']#
ragged_page_attention_v2: NotRequired['BaseOperationConfig | None']#
ragged_page_attention_v3: NotRequired['BaseOperationConfig | None']#
ring: NotRequired['BaseOperationConfig | None']#
sdpa: NotRequired['BaseOperationConfig | None']#
vanilla: NotRequired['BaseOperationConfig | None']#
class easydel.infra.elarge_model.types.PlatformCfg[source]#

Bases: TypedDict

Platform and backend configuration.

backend#

Computation backend (e.g., “jax”, “triton”)

Type

easydel.infra.etils.EasyDeLBackends | None

platform#

Hardware platform (e.g., “tpu”, “gpu”)

Type

easydel.infra.etils.EasyDeLPlatforms | None

backend: easydel.infra.etils.EasyDeLBackends | None#
platform: easydel.infra.etils.EasyDeLPlatforms | None#
class easydel.infra.elarge_model.types.QuantizationCfg[source]#

Bases: TypedDict

Quantization configuration for model compression.

Supports both KV cache quantization and model layer quantization using EasyDeLQuantizationConfig.

platform#

Target platform for quantization

Type

easydel.infra.etils.EasyDeLPlatforms | None

kv_cache#

KV cache quantization config

Type

easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None

model#

model layer quantization config

Type

easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None

quantize_tensors#

Whether to quantize tensors during loading

Type

bool

kv_cache: easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None#
model: easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None#
platform: easydel.infra.etils.EasyDeLPlatforms | None#
quantize_tensors: bool#
class easydel.infra.elarge_model.types.ShardingCfg[source]#

Bases: TypedDict

Model sharding configuration for distributed training/inference.

axis_dims#

Sharding dimensions for each axis (e.g., (1, 1, 1, -1, 1))

Type

Sequence[int]

dcn_axis_dims#

Data center network axis dimensions

Type

Sequence[int]

axis_names#

Names for sharding axes (e.g., (“dp”, “fsdp”, “ep”, “tp”, “sp”))

Type

Sequence[str]

partition_axis#

Custom partition axis configuration

Type

eformer.escale.partition.manager.PartitionAxis | None

shard_fns#

Custom sharding functions

Type

Union[Mapping[tuple, Callable[[…], Any]], dict]

auto_shard_model#

Enable automatic model sharding

Type

bool

partition_rules#

Custom partition rules for layer names

Type

tuple[tuple[str, Any], …]

use_ring_of_experts#

Whether to dispatch experts with ring topology

Type

bool

fsdp_is_ep_bound#

Fold FSDP axis into expert axis when building expert meshes

Type

bool

sp_is_ep_bound#

Fold sequence-parallel axis into expert axis for MoE

Type

bool

auto_shard_model: bool#
axis_dims: Sequence[int]#
axis_names: Sequence[str]#
dcn_axis_dims: Sequence[int]#
fsdp_is_ep_bound: bool#
partition_axis: eformer.escale.partition.manager.PartitionAxis | None#
partition_rules: tuple[tuple[str, Any], ...]#
shard_fns: Union[Mapping[tuple, Callable[[...], Any]], dict]#
sp_is_ep_bound: bool#
use_ring_of_experts: bool#
class easydel.infra.elarge_model.types.TextDatasetInformCfg[source]#

Bases: TypedDict

Text dataset information configuration.

type#

Dataset type (json, parquet, csv, etc.) or HuggingFace dataset ID

Type

Union[Literal[‘json’, ‘jsonl’, ‘parquet’, ‘csv’, ‘arrow’, ‘huggingface’, ‘tsv’, ‘txt’], str]

data_files#

Path(s) to data files (string, list, or glob pattern)

Type

str | list[str]

dataset_split_name#

Name of the dataset split (for HuggingFace datasets)

Type

str | None

split#

Dataset split to use (default: “train”)

Type

str

content_field#

Field name containing text content (default: “content”)

Type

str

additional_fields#

Additional fields to preserve from dataset

Type

list[str]

num_rows#

Optional limit on number of rows to load

Type

int | None

format_callback#

Optional function to transform dataset examples

Type

Optional[Callable[[dict], dict]]

format_fields#

Optional mapping for renaming fields {‘old_name’: ‘new_name’}

Type

dict[str, str] | None

additional_fields: list[str]#
content_field: str#
data_files: str | list[str]#
dataset_split_name: str | None#
format_callback: Optional[Callable[[dict], dict]]#
format_fields: dict[str, str] | None#
num_rows: int | None#
split: str#
type: Union[Literal['json', 'jsonl', 'parquet', 'csv', 'arrow', 'huggingface', 'tsv', 'txt'], str]#
class easydel.infra.elarge_model.types.TokenizationCfg[source]#

Bases: TypedDict

Tokenization configuration for dataset preprocessing.

tokenizer#

HuggingFace tokenizer name/path (defaults to model’s tokenizer)

Type

str | None

max_length#

Maximum sequence length for tokenization (default: 2048)

Type

int

truncation#

Whether to truncate sequences exceeding max_length (default: True)

Type

bool

padding#

Padding strategy - “max_length”, “longest”, False (default: False)

Type

Union[bool, Literal[‘max_length’, ‘longest’]]

add_special_tokens#

Whether to add special tokens like BOS/EOS (default: True)

Type

bool

return_attention_mask#

Whether to return attention masks (default: True)

Type

bool

text_field#

Field name containing text to tokenize (default: “text”)

Type

str

output_field#

Field name for tokenized output (default: “tokens”)

Type

str

num_proc#

Number of processes for parallel tokenization (default: None, auto)

Type

int | None

batched#

Whether to process examples in batches (default: True)

Type

bool

batch_size#

Batch size for batched processing (default: 1000)

Type

int

remove_columns#

Columns to remove after tokenization (default: None, auto-detect)

Type

list[str] | None

keep_in_memory#

Keep processed dataset in memory (default: False)

Type

bool

add_special_tokens: bool#
batch_size: int#
batched: bool#
keep_in_memory: bool#
max_length: int#
num_proc: int | None#
output_field: str#
padding: Union[bool, Literal['max_length', 'longest']]#
remove_columns: list[str] | None#
return_attention_mask: bool#
text_field: str#
tokenizer: str | None#
truncation: bool#
class easydel.infra.elarge_model.types.VisualDatasetInformCfg[source]#

Bases: TypedDict

Visual dataset information configuration.

type#

Dataset type (json, parquet, csv, etc.) or HuggingFace dataset ID

Type

Union[Literal[‘json’, ‘jsonl’, ‘parquet’, ‘csv’, ‘arrow’, ‘huggingface’, ‘tsv’, ‘txt’], str]

data_files#

Path(s) to data files (string, list, or glob pattern)

Type

str | list[str]

dataset_split_name#

Name of the dataset split (for HuggingFace datasets)

Type

str | None

split#

Dataset split to use (default: “train”)

Type

str

pixel_field#

Field name containing image data (default: “images”)

Type

str

content_field#

Optional field name containing text descriptions

Type

str | None

image_size#

Target image size as (width, height) tuple

Type

tuple[int, int] | None

num_rows#

Optional limit on number of rows to load

Type

int | None

format_callback#

Optional function to transform dataset examples

Type

Optional[Callable[[dict], dict]]

format_fields#

Optional mapping for renaming fields {‘old_name’: ‘new_name’}

Type

dict[str, str] | None

content_field: str | None#
data_files: str | list[str]#
dataset_split_name: str | None#
format_callback: Optional[Callable[[dict], dict]]#
format_fields: dict[str, str] | None#
image_size: tuple[int, int] | None#
num_rows: int | None#
pixel_field: str#
split: str#
type: Union[Literal['json', 'jsonl', 'parquet', 'csv', 'arrow', 'huggingface', 'tsv', 'txt'], str]#
class easydel.infra.elarge_model.types.eSurgeCfg[source]#

Bases: TypedDict

eSurge inference engine configuration.

max_model_len#

Maximum sequence length for the model.

Type

NotRequired[int]

min_input_pad#

Minimum padding for input sequences (default: 16).

Type

NotRequired[int]

max_num_seqs#

Maximum number of concurrent sequences (default: 256).

Type

NotRequired[int]

max_num_batched_tokens#

Optional cap on total tokens per batch.

Type

NotRequired[int | None]

hbm_utilization#

HBM memory utilization ratio (default: 0.85).

Type

NotRequired[float]

page_size#

Page size for paged attention (default: 128).

Type

NotRequired[int]

use_aot_forward#

Use ahead-of-time compiled forward pass.

Type

NotRequired[bool]

enable_prefix_caching#

Enable prefix caching optimization.

Type

NotRequired[bool]

auto_shard_model#

Enable automatic model sharding.

Type

NotRequired[bool]

sharding_axis_dims#

Sharding axis dimensions (default: (1, 1, 1, -1, 1)).

Type

NotRequired[tp.Sequence[int]]

compile_runner#

Compile the runner helpers on startup.

Type

NotRequired[bool]

runner_verbose#

Enable verbose runner logs (alias: verbose).

Type

NotRequired[bool]

verbose#

Legacy alias for runner_verbose.

Type

NotRequired[bool]

overlap_execution#

Enable overlapping scheduler and execution (experimental).

Type

NotRequired[bool]

sampler_metrics#

Enable sampler-side metrics collection.

Type

NotRequired[bool]

esurge_name#

Optional engine display name.

Type

NotRequired[str | None]

reserve_tokens#

Tokens reserved from the context budget.

Type

NotRequired[int | None]

auto_truncate_prompt#

Allow automatic prompt truncation.

Type

NotRequired[bool]

auto_cap_new_tokens#

Cap requested new tokens to fit context.

Type

NotRequired[bool]

strict_context#

Raise on context violations instead of auto-fixing.

Type

NotRequired[bool]

truncate_mode#

Truncation strategy (“left”, “right”, “middle”).

Type

NotRequired[tp.Literal[‘left’, ‘right’, ‘middle’]]

prefer_preserve_prompt#

Prefer preserving prompt before truncating it.

Type

NotRequired[bool]

decode_truncated_prompt#

Re-decode truncated prompts for text fidelity.

Type

NotRequired[bool]

destroy_pages_on_pause#

Destroy cache pages when pausing the engine.

Type

NotRequired[bool]

detokenizer_max_states#

Maximum states kept in the detokenizer worker.

Type

NotRequired[int]

tokenizer_endpoint#

External tokenizer worker endpoint.

Type

NotRequired[str | None]

detokenizer_endpoint#

External detokenizer worker endpoint.

Type

NotRequired[str | None]

sampling_params_callback#

Optional hook to mutate SamplingParams per request.

Type

NotRequired[tp.Callable[[SamplingParams, dict[str, tp.Any]], SamplingParams | None] | None]

extra_eos_token_ids#

Additional EOS token IDs applied globally.

Type

NotRequired[list[int] | None]

silent_mode#

Suppress informational eSurge engine logs.

Type

NotRequired[bool]

auto_cap_new_tokens: NotRequired[bool]#
auto_shard_model: NotRequired[bool]#
auto_truncate_prompt: NotRequired[bool]#
compile_runner: NotRequired[bool]#
decode_truncated_prompt: NotRequired[bool]#
destroy_pages_on_pause: NotRequired[bool]#
detokenizer_endpoint: NotRequired[str | None]#
detokenizer_max_states: NotRequired[int]#
enable_prefix_caching: NotRequired[bool]#
esurge_name: NotRequired[str | None]#
extra_eos_token_ids: NotRequired[list[int] | None]#
hbm_utilization: NotRequired[float]#
max_model_len: NotRequired[int]#
max_num_batched_tokens: NotRequired[int | None]#
max_num_seqs: NotRequired[int]#
min_input_pad: NotRequired[int]#
overlap_execution: NotRequired[bool]#
page_size: NotRequired[int]#
prefer_preserve_prompt: NotRequired[bool]#
reserve_tokens: NotRequired[int | None]#
runner_verbose: NotRequired[bool]#
sampler_metrics: NotRequired[bool]#
sampling_params_callback: NotRequired[tp.Callable[[SamplingParams, dict[str, tp.Any]], SamplingParams | None] | None]#
sharding_axis_dims: NotRequired[tp.Sequence[int]]#
silent_mode: NotRequired[bool]#
strict_context: NotRequired[bool]#
tokenizer_endpoint: NotRequired[str | None]#
truncate_mode: NotRequired[tp.Literal['left', 'right', 'middle']]#
use_aot_forward: NotRequired[bool]#
verbose: NotRequired[bool]#