easydel.infra.elarge_model.types

easydel.infra.elarge_model.types#

Type definitions for ELM configuration system.

This module defines the TypedDict structures used throughout the ELM configuration system, providing type safety and documentation for configuration schemas.

class easydel.infra.elarge_model.types.BaseCfg[source]#

Bases: TypedDict

Base configuration values container.

values#

Dictionary of base configuration values that get passed to the model’s config during initialization

Type: easydel.infra.base_config.EasyDeLBaseConfigDict | dict[str, Any]

operation_configs#

ejkernel operation config overrides. Maps implementation names (e.g., “flash_attn2”, “ring”) to their config objects. When set, overrides ejkernel autotune.

Type: easydel.infra.elarge_model.types.OperationConfigsDict | None

operation_configs: easydel.infra.elarge_model.types.OperationConfigsDict | None#

class easydel.infra.elarge_model.types.DataMixtureCfg[source]#

Bases: TypedDict

Data mixture configuration for training/evaluation datasets.

informs#

List of dataset configurations (text or visual)

Type: list[easydel.infra.elarge_model.types.TextDatasetInformCfg | easydel.infra.elarge_model.types.VisualDatasetInformCfg]

cache_dir#

Directory for caching datasets (default: ~/.cache/easydel)

Type: str

streaming#

Whether to use streaming mode for large datasets (default: True)

Type: bool

text_target_field#

Target field name for text in unified dataset (default: “text”)

Type: str

image_target_field#

Target field name for images in unified dataset (default: “image”)

Type: str

batch_size#

Batch size for data loading (default: 1)

Type: int

shuffle_buffer_size#

Buffer size for shuffling in streaming mode (default: None)

Type: int | None

seed#

Random seed for shuffling and sampling (default: 42)

Type: int | None

# Token packing configuration

pack_tokens#

Enable pre-tokenized sequence packing (default: False)

Type: bool

tokens_field_name#

Field name containing token IDs (default: “tokens”)

Type: str

pack_seq_length#

Target sequence length for packing (default: None)

Type: int | None

pack_eos_token_id#

EOS token ID for padding/separation (default: 0)

Type: int

pack_shuffle#

Shuffle packed sequences (default: True)

Type: bool

pack_shuffle_buffer_factor#

Buffer size multiplier for shuffle (default: 16)

Type: int

dask_storage_options#

Storage options for dask/remote files (default: None)

Type: dict | None

# On-the-fly tokenization and packing

pack_on_the_fly#

Enable on-the-fly tokenization and packing (default: False)

Type: bool

tokenize_callback#

Function to tokenize examples, returns token IDs (default: None)

Type: Optional[Callable[[dict], list[int]]]

# Block-deterministic mixture configuration

block_mixture#

Use deterministic block mixing instead of standard interleave (default: True)

Type: bool

mixture_block_size#

Number of examples per block (default: 2048)

Type: int

stop_strategy#

Strategy when dataset exhausted - “restart” or “first_exhausted” (default: “restart”)

Type: str

mixture_weights#

Per-dataset weights as dict mapping dataset identifier to weight (default: None)

Type: dict[str, float] | None

# Tokenization configuration

tokenization#

Configuration for tokenizing the dataset (default: None)

Type: easydel.infra.elarge_model.types.TokenizationCfg | None

# Save configuration

save#

Configuration for saving the processed dataset (default: None)

Type: easydel.infra.elarge_model.types.DatasetSaveCfg | None

# ShardedDataSource configuration

Type: new data pipeline

use_sharded_source#

Use new ShardedDataSource architecture (default: False) When True, builds a ShardedDataSource instead of HF Dataset for more efficient streaming and lazy transforms.

Type: bool

# Legacy/deprecated attributes

Type: kept for compatibility

use_fast_loader#

Enable fast data loading with fsspec (deprecated)

Type: bool

num_workers#

Number of parallel workers for data loading (deprecated)

Type: int

prefetch_size#

Number of batches to prefetch (deprecated)

Type: int

enable_caching#

Enable dataset caching for faster reloads (deprecated)

Type: bool

batch_size: int#

block_mixture: bool#

cache_dir: str#

dask_storage_options: dict | None#

enable_caching: bool#

image_target_field: str#

informs: list[easydel.infra.elarge_model.types.TextDatasetInformCfg | easydel.infra.elarge_model.types.VisualDatasetInformCfg]#

mixture_block_size: int#

mixture_weights: dict[str, float] | None#

num_workers: int#

pack_eos_token_id: int#

pack_on_the_fly: bool#

pack_seq_length: int | None#

pack_shuffle: bool#

pack_shuffle_buffer_factor: int#

pack_tokens: bool#

prefetch_size: int#

save: easydel.infra.elarge_model.types.DatasetSaveCfg | None#

seed: int | None#

shuffle_buffer_size: int | None#

stop_strategy: str#

streaming: bool#

text_target_field: str#

tokenization: easydel.infra.elarge_model.types.TokenizationCfg | None#

tokenize_callback: Optional[Callable[[dict], list[int]]]#

tokens_field_name: str#

use_fast_loader: bool#

use_sharded_source: bool#

class easydel.infra.elarge_model.types.DatasetSaveCfg[source]#

Bases: TypedDict

Configuration for saving processed/tokenized datasets.

output_path#

Path to save the dataset (required)

Type: str

format#

Output format - “parquet”, “arrow”, “json”, “jsonl” (default: “parquet”)

Type: Literal[‘parquet’, ‘arrow’, ‘json’, ‘jsonl’]

num_shards#

Number of shards to split the dataset into (default: None, auto)

Type: int | None

compression#

Compression algorithm - “snappy”, “gzip”, “zstd”, None (default: “snappy”)

Type: Optional[Literal[‘snappy’, ‘gzip’, ‘zstd’]]

max_shard_size#

Maximum shard size in bytes or string like “500MB” (default: “500MB”)

Type: str | int

overwrite#

Whether to overwrite existing files (default: False)

Type: bool

push_to_hub#

Whether to push to HuggingFace Hub (default: False)

Type: bool

hub_repo_id#

HuggingFace Hub repository ID (required if push_to_hub=True)

Type: str | None

hub_private#

Whether to make the Hub repository private (default: False)

Type: bool

hub_token#

HuggingFace token for authentication (default: None, use env)

Type: str | None

compression: Optional[Literal['snappy', 'gzip', 'zstd']]#

format: Literal['parquet', 'arrow', 'json', 'jsonl']#

hub_private: bool#

hub_repo_id: str | None#

hub_token: str | None#

max_shard_size: str | int#

num_shards: int | None#

output_path: str#

overwrite: bool#

push_to_hub: bool#

class easydel.infra.elarge_model.types.ELMConfig[source]#

Bases: TypedDict

Complete ELM configuration structure.

This is the top-level configuration type that combines all configuration sections for model loading, sharding, quantization, inference, training, and data.

model#

Model configuration (required)

Type: easydel.infra.elarge_model.types.ModelCfg

teacher_model#

Teacher model configuration for distillation training

Type: easydel.infra.elarge_model.types.ModelCfg

reference_model#

Reference model configuration for preference optimization (DPO, etc.)

Type: easydel.infra.elarge_model.types.ModelCfg

loader#

Model loading configuration

Type: easydel.infra.elarge_model.types.LoaderCfg

sharding#

Distributed sharding configuration

Type: easydel.infra.elarge_model.types.ShardingCfg

platform#

Platform and backend configuration

Type: easydel.infra.elarge_model.types.PlatformCfg

quantization#

Quantization configuration

Type: easydel.infra.elarge_model.types.QuantizationCfg

base_config#

Base model configuration values

Type: easydel.infra.elarge_model.types.BaseCfg

mixture#

Data mixture configuration for training/evaluation datasets

Type: easydel.infra.elarge_model.types.DataMixtureCfg

esurge#

eSurge inference engine configuration

Type: easydel.infra.elarge_model.types.eSurgeCfg

trainer#

Training configuration

Type: easydel.infra.elarge_model.trainer_types.TrainerConfig

eval#

Evaluation configuration for lm-evaluation-harness

Type: easydel.infra.elarge_model.types.EvalKwargs

Example

>>> # Basic configuration
>>> config: ELMConfig = {
...     "model": {"name_or_path": "meta-llama/Llama-2-7b"},
...     "loader": {"dtype": "bf16"},
...     "sharding": {"axis_dims": (1, 1, 1, -1, 1)},
...     "mixture": {
...         "informs": [
...             {"type": "json", "data_files": "train.json", "content_field": "text"},
...             {"type": "parquet", "data_files": "valid/*.parquet", "content_field": "content"}
...         ],
...         "batch_size": 32
...     }
... }
>>>
>>> # Advanced configuration with distillation, DPO, and token packing
>>> config: ELMConfig = {
...     "model": {"name_or_path": "meta-llama/Llama-2-7b"},
...     "teacher_model": {"name_or_path": "meta-llama/Llama-2-13b"},  # For distillation
...     "reference_model": {"name_or_path": "meta-llama/Llama-2-7b-instruct"},  # For DPO
...     "loader": {"dtype": "bf16", "param_dtype": "fp32"},
...     "sharding": {"axis_dims": (1, 1, 1, -1, 1)},
...     "mixture": {
...         "informs": [
...             {"type": "json", "data_files": "train/*.json", "format_fields": {"prompt": "text"}},
...             {"type": "parquet", "data_files": "valid/*.parquet"}
...         ],
...         "batch_size": 32,
...         "block_mixture": True,  # Use deterministic block mixing
...         "mixture_weights": {"train": 0.8, "valid": 0.2},
...         "pack_tokens": True,  # Enable token packing
...         "pack_seq_length": 2048,
...         "pack_eos_token_id": 2
...     },
...     "esurge": {"max_model_len": 4096, "enable_prefix_caching": True},
...     "eval": {"max_new_tokens": 1024, "temperature": 0.0}
... }

base_config: BaseCfg#

esurge: eSurgeCfg#

eval: EvalKwargs#

loader: LoaderCfg#

mixture: DataMixtureCfg#

model: ModelCfg#

platform: PlatformCfg#

quantization: QuantizationCfg#

reference_model: ModelCfg#

sharding: ShardingCfg#

teacher_model: ModelCfg#

trainer: TrainerConfig#

class easydel.infra.elarge_model.types.EasyDeLQuantizationCfg[source]#

Bases: TypedDict

Extended quantization config with pattern support for layer selection.

This config extends eformer’s QuantizationConfig with an additional pattern field for selecting which layers to quantize.

dtype#

The quantization type (NF4, INT8, TERNARY, BINARY).

Type: Literal[‘nf4’, ‘int8’, ‘ternary’, ‘binary’]

block_size#

Block size for block-wise quantization.

Type: int

simulate#

If True, uses STE without actual bit packing (QAT mode).

Type: bool

use_kernel#

If True, uses optimized TPU/GPU kernels when available.

Type: bool

pattern#

Regex pattern for selecting layers to quantize. Default excludes embedding and norm layers.

Type: str

block_size: int#

dtype: Literal['nf4', 'int8', 'ternary', 'binary']#

pattern: str#

simulate: bool#

use_kernel: bool#

class easydel.infra.elarge_model.types.EvalKwargs[source]#

Bases: TypedDict

Evaluation keyword arguments for lm-evaluation-harness.

max_new_tokens#

Maximum number of tokens to generate (default: 2048)

Type: int

temperature#

Sampling temperature for generation (default: 0.0)

Type: float

top_p#

Top-p sampling parameter (default: 0.95)

Type: float

batch_size#

Evaluation batch size (default: engine-specific)

Type: int | None

use_tqdm#

Show progress bar during evaluation (default: True)

Type: bool

limit#

Maximum number of examples to evaluate per task

Type: int | float | None

cache_requests#

Whether to cache model outputs

Type: bool

check_integrity#

Whether to check task integrity

Type: bool

write_out#

Whether to write outputs to file

Type: bool

log_samples#

Whether to log individual samples

Type: bool

system_instruction#

System instruction for chat models

Type: str | None

apply_chat_template#

Whether to apply chat template

Type: bool

fewshot_as_multiturn#

Use fewshot examples as multi-turn conversation

Type: bool

gen_kwargs#

Additional generation kwargs

Type: dict[str, Any] | None

predict_only#

Only run predictions without scoring

Type: bool

random_seed#

Random seed for reproducibility

Type: int | None

numpy_random_seed#

NumPy random seed

Type: int | None

torch_random_seed#

PyTorch random seed

Type: int | None

fewshot_random_seed#

Random seed for fewshot sampling

Type: int | None

apply_chat_template: bool#

batch_size: int | None#

cache_requests: bool#

check_integrity: bool#

fewshot_as_multiturn: bool#

fewshot_random_seed: int | None#

gen_kwargs: dict[str, Any] | None#

limit: int | float | None#

log_samples: bool#

max_new_tokens: int#

numpy_random_seed: int | None#

predict_only: bool#

random_seed: int | None#

system_instruction: str | None#

temperature: float#

top_p: float#

torch_random_seed: int | None#

use_tqdm: bool#

write_out: bool#

class easydel.infra.elarge_model.types.LoaderCfg[source]#

Bases: TypedDict

Model loading configuration.

device#

Device to load model on

Type: Any

dtype#

Computation data type (e.g., “bf16”, “fp16”, “fp32”)

Type: Union[str, numpy.dtype, type, Literal[‘fp8’, ‘bf16’, ‘fp16’, ‘fp32’]]

param_dtype#

Parameter storage data type

Type: Union[str, numpy.dtype, type, Literal[‘fp8’, ‘bf16’, ‘fp16’, ‘fp32’]]

precision#

JAX precision level for matmuls

Type: Union[str, jax._src.lax.lax.Precision, None, Literal[‘HIGH’, ‘DEFAULT’, ‘HIGHEST’]]

verbose#

Enable verbose loading output

Type: bool

from_torch#

Whether to convert from PyTorch checkpoint

Type: bool | None

device: Any#

dtype: Union[str, dtype, type, Literal['fp8', 'bf16', 'fp16', 'fp32']]#

from_torch: bool | None#

param_dtype: Union[str, dtype, type, Literal['fp8', 'bf16', 'fp16', 'fp32']]#

precision: Union[str, Precision, None, Literal['HIGH', 'DEFAULT', 'HIGHEST']]#

verbose: bool#

class easydel.infra.elarge_model.types.ModelCfg[source]#

Bases: TypedDict

Model configuration section.

name_or_path#

HuggingFace model ID or local path (required)

Type: str

tokenizer#

Custom tokenizer path, defaults to name_or_path

Type: str

task#

Task type for auto-detection override

Type: Union[easydel.infra.factory.TaskType, str, Literal[‘causal-language-model’, ‘vision-language-model’, ‘diffusion-language-model’, ‘image-text-to-text’, ‘base-module’, ‘vision-module’, ‘sequence-to-sequence’, ‘speech-sequence-to-sequence’, ‘zero-shot-image-classification’, ‘sequence-classification’, ‘audio-classification’, ‘image-classification’, ‘auto-bind’]]

extra_kwargs#

Additional model loading arguments

Type: dict[str, Any]

extra_kwargs: dict[str, Any]#

name_or_path: str#

task: Union[TaskType, str, Literal['causal-language-model', 'vision-language-model', 'diffusion-language-model', 'image-text-to-text', 'base-module', 'vision-module', 'sequence-to-sequence', 'speech-sequence-to-sequence', 'zero-shot-image-classification', 'sequence-classification', 'audio-classification', 'image-classification', 'auto-bind']]#

tokenizer: str#

class easydel.infra.elarge_model.types.OperationConfigsDict[source]#

Bases: TypedDict

Configuration dictionary for ejkernel operation overrides.

Maps operation implementation names to their corresponding config objects. Keys must match the names registered in OperationRegistry (via get_impl_name()). When a config is provided for an operation, it overrides ejkernel’s autotune. When None or not set, ejkernel will use its default autotune behavior.

flash_attn2#

Config for flash attention 2 implementation.

Type: NotRequired[‘BaseOperationConfig | None’]

ring#

Config for ring attention.

Type: NotRequired[‘BaseOperationConfig | None’]

blocksparse#

Config for block sparse attention.

Type: NotRequired[‘BaseOperationConfig | None’]

ragged_page_attention_v2#

Config for ragged page attention v2.

Type: NotRequired[‘BaseOperationConfig | None’]

ragged_page_attention_v3#

Config for ragged page attention v3.

Type: NotRequired[‘BaseOperationConfig | None’]

sdpa#

Config for scaled dot product attention (also registered as cudnn, cuda_flash_attn2).

Type: NotRequired[‘BaseOperationConfig | None’]

vanilla#

Config for vanilla attention.

Type: NotRequired[‘BaseOperationConfig | None’]

Example

>>> from easydel import FlashAttentionConfig, RingAttentionConfig
>>> operation_configs: OperationConfigsDict = {
...     "flash_attn2": FlashAttentionConfig(platform="triton"),
...     "ring": RingAttentionConfig(),
... }

blocksparse: NotRequired['BaseOperationConfig | None']#

flash_attn2: NotRequired['BaseOperationConfig | None']#

ragged_page_attention_v2: NotRequired['BaseOperationConfig | None']#

ragged_page_attention_v3: NotRequired['BaseOperationConfig | None']#

ring: NotRequired['BaseOperationConfig | None']#

sdpa: NotRequired['BaseOperationConfig | None']#

vanilla: NotRequired['BaseOperationConfig | None']#

class easydel.infra.elarge_model.types.PlatformCfg[source]#

Bases: TypedDict

Platform and backend configuration.

backend#

Computation backend (e.g., “jax”, “triton”)

Type: easydel.infra.etils.EasyDeLBackends | None

platform#

Hardware platform (e.g., “tpu”, “gpu”)

Type: easydel.infra.etils.EasyDeLPlatforms | None

backend: easydel.infra.etils.EasyDeLBackends | None#

platform: easydel.infra.etils.EasyDeLPlatforms | None#

class easydel.infra.elarge_model.types.QuantizationCfg[source]#

Bases: TypedDict

Quantization configuration for model compression.

Supports both KV cache quantization and model layer quantization using EasyDeLQuantizationConfig.

platform#

Target platform for quantization

Type: easydel.infra.etils.EasyDeLPlatforms | None

kv_cache#

KV cache quantization config

Type: easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None

model#

model layer quantization config

Type: easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None

quantize_tensors#

Whether to quantize tensors during loading

Type: bool

kv_cache: easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None#

model: easydel.layers.quantization.quantizers.EasyDeLQuantizationConfig | easydel.infra.elarge_model.types.EasyDeLQuantizationCfg | None#

platform: easydel.infra.etils.EasyDeLPlatforms | None#

quantize_tensors: bool#

class easydel.infra.elarge_model.types.ShardingCfg[source]#

Bases: TypedDict

Model sharding configuration for distributed training/inference.

axis_dims#

Sharding dimensions for each axis (e.g., (1, 1, 1, -1, 1))

Type: Sequence[int]

dcn_axis_dims#

Data center network axis dimensions

Type: Sequence[int]

axis_names#

Names for sharding axes (e.g., (“dp”, “fsdp”, “ep”, “tp”, “sp”))

Type: Sequence[str]

partition_axis#

Custom partition axis configuration

Type: eformer.escale.partition.manager.PartitionAxis | None

shard_fns#

Custom sharding functions

Type: Union[Mapping[tuple, Callable[[…], Any]], dict]

auto_shard_model#

Enable automatic model sharding

Type: bool

partition_rules#

Custom partition rules for layer names

Type: tuple[tuple[str, Any], …]

use_ring_of_experts#

Whether to dispatch experts with ring topology

Type: bool

fsdp_is_ep_bound#

Fold FSDP axis into expert axis when building expert meshes

Type: bool

sp_is_ep_bound#

Fold sequence-parallel axis into expert axis for MoE

Type: bool

auto_shard_model: bool#

axis_dims: Sequence[int]#

axis_names: Sequence[str]#

dcn_axis_dims: Sequence[int]#

fsdp_is_ep_bound: bool#

partition_axis: eformer.escale.partition.manager.PartitionAxis | None#

partition_rules: tuple[tuple[str, Any], ...]#

shard_fns: Union[Mapping[tuple, Callable[[...], Any]], dict]#

sp_is_ep_bound: bool#

use_ring_of_experts: bool#

class easydel.infra.elarge_model.types.TextDatasetInformCfg[source]#

Bases: TypedDict

Text dataset information configuration.

type#

Dataset type (json, parquet, csv, etc.) or HuggingFace dataset ID

Type: Union[Literal[‘json’, ‘jsonl’, ‘parquet’, ‘csv’, ‘arrow’, ‘huggingface’, ‘tsv’, ‘txt’], str]

data_files#

Path(s) to data files (string, list, or glob pattern)

Type: str | list[str]

dataset_split_name#

Name of the dataset split (for HuggingFace datasets)

Type: str | None

split#

Dataset split to use (default: “train”)

Type: str

content_field#

Field name containing text content (default: “content”)

Type: str

additional_fields#

Additional fields to preserve from dataset

Type: list[str]

num_rows#

Optional limit on number of rows to load

Type: int | None

format_callback#

Optional function to transform dataset examples

Type: Optional[Callable[[dict], dict]]

format_fields#

Optional mapping for renaming fields {‘old_name’: ‘new_name’}

Type: dict[str, str] | None

additional_fields: list[str]#

content_field: str#

data_files: str | list[str]#

dataset_split_name: str | None#

format_callback: Optional[Callable[[dict], dict]]#

format_fields: dict[str, str] | None#

num_rows: int | None#

split: str#

type: Union[Literal['json', 'jsonl', 'parquet', 'csv', 'arrow', 'huggingface', 'tsv', 'txt'], str]#

class easydel.infra.elarge_model.types.TokenizationCfg[source]#

Bases: TypedDict

Tokenization configuration for dataset preprocessing.

tokenizer#

HuggingFace tokenizer name/path (defaults to model’s tokenizer)

Type: str | None

max_length#

Maximum sequence length for tokenization (default: 2048)

Type: int

truncation#

Whether to truncate sequences exceeding max_length (default: True)

Type: bool

padding#

Padding strategy - “max_length”, “longest”, False (default: False)

Type: Union[bool, Literal[‘max_length’, ‘longest’]]

add_special_tokens#

Whether to add special tokens like BOS/EOS (default: True)

Type: bool

return_attention_mask#

Whether to return attention masks (default: True)

Type: bool

text_field#

Field name containing text to tokenize (default: “text”)

Type: str

output_field#

Field name for tokenized output (default: “tokens”)

Type: str

num_proc#

Number of processes for parallel tokenization (default: None, auto)

Type: int | None

batched#

Whether to process examples in batches (default: True)

Type: bool

batch_size#

Batch size for batched processing (default: 1000)

Type: int

remove_columns#

Columns to remove after tokenization (default: None, auto-detect)

Type: list[str] | None

keep_in_memory#

Keep processed dataset in memory (default: False)

Type: bool

add_special_tokens: bool#

batch_size: int#

batched: bool#

keep_in_memory: bool#

max_length: int#

num_proc: int | None#

output_field: str#

padding: Union[bool, Literal['max_length', 'longest']]#

remove_columns: list[str] | None#

return_attention_mask: bool#

text_field: str#

tokenizer: str | None#

truncation: bool#

class easydel.infra.elarge_model.types.VisualDatasetInformCfg[source]#

Bases: TypedDict

Visual dataset information configuration.

type#

Dataset type (json, parquet, csv, etc.) or HuggingFace dataset ID

Type: Union[Literal[‘json’, ‘jsonl’, ‘parquet’, ‘csv’, ‘arrow’, ‘huggingface’, ‘tsv’, ‘txt’], str]

data_files#

Path(s) to data files (string, list, or glob pattern)

Type: str | list[str]

dataset_split_name#

Name of the dataset split (for HuggingFace datasets)

Type: str | None

split#

Dataset split to use (default: “train”)

Type: str

pixel_field#

Field name containing image data (default: “images”)

Type: str

content_field#

Optional field name containing text descriptions

Type: str | None

image_size#

Target image size as (width, height) tuple

Type: tuple[int, int] | None

num_rows#

Optional limit on number of rows to load

Type: int | None

format_callback#

Optional function to transform dataset examples

Type: Optional[Callable[[dict], dict]]

format_fields#

Optional mapping for renaming fields {‘old_name’: ‘new_name’}

Type: dict[str, str] | None

content_field: str | None#

data_files: str | list[str]#

dataset_split_name: str | None#

format_callback: Optional[Callable[[dict], dict]]#

format_fields: dict[str, str] | None#

image_size: tuple[int, int] | None#

num_rows: int | None#

pixel_field: str#

split: str#

type: Union[Literal['json', 'jsonl', 'parquet', 'csv', 'arrow', 'huggingface', 'tsv', 'txt'], str]#

class easydel.infra.elarge_model.types.eSurgeCfg[source]#

Bases: TypedDict

eSurge inference engine configuration.

max_model_len#

Maximum sequence length for the model.

Type: NotRequired[int]

min_input_pad#

Minimum padding for input sequences (default: 16).

Type: NotRequired[int]

max_num_seqs#

Maximum number of concurrent sequences (default: 256).

Type: NotRequired[int]

max_num_batched_tokens#

Optional cap on total tokens per batch.

Type: NotRequired[int | None]

hbm_utilization#

HBM memory utilization ratio (default: 0.85).

Type: NotRequired[float]

page_size#

Page size for paged attention (default: 128).

Type: NotRequired[int]

use_aot_forward#

Use ahead-of-time compiled forward pass.

Type: NotRequired[bool]

enable_prefix_caching#

Enable prefix caching optimization.

Type: NotRequired[bool]

auto_shard_model#

Enable automatic model sharding.

Type: NotRequired[bool]

sharding_axis_dims#

Sharding axis dimensions (default: (1, 1, 1, -1, 1)).

Type: NotRequired[tp.Sequence[int]]

compile_runner#

Compile the runner helpers on startup.

Type: NotRequired[bool]

runner_verbose#

Enable verbose runner logs (alias: verbose).

Type: NotRequired[bool]

verbose#

Legacy alias for runner_verbose.

Type: NotRequired[bool]

overlap_execution#

Enable overlapping scheduler and execution (experimental).

Type: NotRequired[bool]

sampler_metrics#

Enable sampler-side metrics collection.

Type: NotRequired[bool]

esurge_name#

Optional engine display name.

Type: NotRequired[str | None]

reserve_tokens#

Tokens reserved from the context budget.

Type: NotRequired[int | None]

auto_truncate_prompt#

Allow automatic prompt truncation.

Type: NotRequired[bool]

auto_cap_new_tokens#

Cap requested new tokens to fit context.

Type: NotRequired[bool]

strict_context#

Raise on context violations instead of auto-fixing.

Type: NotRequired[bool]

truncate_mode#

Truncation strategy (“left”, “right”, “middle”).

Type: NotRequired[tp.Literal[‘left’, ‘right’, ‘middle’]]

prefer_preserve_prompt#

Prefer preserving prompt before truncating it.

Type: NotRequired[bool]

decode_truncated_prompt#

Re-decode truncated prompts for text fidelity.

Type: NotRequired[bool]

destroy_pages_on_pause#

Destroy cache pages when pausing the engine.

Type: NotRequired[bool]

detokenizer_max_states#

Maximum states kept in the detokenizer worker.

Type: NotRequired[int]

tokenizer_endpoint#

External tokenizer worker endpoint.

Type: NotRequired[str | None]

detokenizer_endpoint#

External detokenizer worker endpoint.

Type: NotRequired[str | None]

sampling_params_callback#

Optional hook to mutate SamplingParams per request.

Type: NotRequired[tp.Callable[[SamplingParams, dict[str, tp.Any]], SamplingParams | None] | None]

extra_eos_token_ids#

Additional EOS token IDs applied globally.

Type: NotRequired[list[int] | None]

silent_mode#

Suppress informational eSurge engine logs.

Type: NotRequired[bool]

auto_cap_new_tokens: NotRequired[bool]#

auto_shard_model: NotRequired[bool]#

auto_truncate_prompt: NotRequired[bool]#

compile_runner: NotRequired[bool]#

decode_truncated_prompt: NotRequired[bool]#

destroy_pages_on_pause: NotRequired[bool]#

detokenizer_endpoint: NotRequired[str | None]#

detokenizer_max_states: NotRequired[int]#

enable_prefix_caching: NotRequired[bool]#

esurge_name: NotRequired[str | None]#

extra_eos_token_ids: NotRequired[list[int] | None]#

hbm_utilization: NotRequired[float]#

max_model_len: NotRequired[int]#

max_num_batched_tokens: NotRequired[int | None]#

max_num_seqs: NotRequired[int]#

min_input_pad: NotRequired[int]#

overlap_execution: NotRequired[bool]#

page_size: NotRequired[int]#

prefer_preserve_prompt: NotRequired[bool]#

reserve_tokens: NotRequired[int | None]#

runner_verbose: NotRequired[bool]#

sampler_metrics: NotRequired[bool]#

sampling_params_callback: NotRequired[tp.Callable[[SamplingParams, dict[str, tp.Any]], SamplingParams | None] | None]#

sharding_axis_dims: NotRequired[tp.Sequence[int]]#

silent_mode: NotRequired[bool]#

strict_context: NotRequired[bool]#

tokenizer_endpoint: NotRequired[str | None]#

truncate_mode: NotRequired[tp.Literal['left', 'right', 'middle']]#

use_aot_forward: NotRequired[bool]#

verbose: NotRequired[bool]#

easydel.infra.elarge_model.types

Contents

easydel.infra.elarge_model.types#