easydel.modules.deepseek_v3.modeling_deepseek_flax#

class easydel.modules.deepseek_v3.modeling_deepseek_flax.DeepseekV3Attention(*args: Any, **kwargs: Any)[source]#

Bases: AttentionModule

class easydel.modules.deepseek_v3.modeling_deepseek_flax.DeepseekV3DecoderLayer(*args: Any, **kwargs: Any)[source]#

Bases: Module

class easydel.modules.deepseek_v3.modeling_deepseek_flax.DeepseekV3ForCausalLM(*args: Any, **kwargs: Any)[source]#

Bases: EasyDeLBaseModule

DeepseekV3 model with a language modeling head for causal language modeling tasks.

This model extends the base DeepseekV3Model by adding a linear language modeling head on top of the transformer model. It incorporates Mixture of Experts (MoE) architecture and is designed for generative tasks and text generation.

class easydel.modules.deepseek_v3.modeling_deepseek_flax.DeepseekV3MLP(*args: Any, **kwargs: Any)[source]#

Bases: Module

class easydel.modules.deepseek_v3.modeling_deepseek_flax.DeepseekV3MoE(*args: Any, **kwargs: Any)[source]#

Bases: Module

moe_infer(x: Array, topk_ids: Array, topk_weight: Array) Array[source]#
Parameters
  • x – Input tensor of shape [batch_size, hidden_dim]

  • topk_ids – Tensor of expert assignments [batch_size, top_k]

  • topk_weight – Tensor of expert weights [batch_size, top_k]

Returns

Output tensor of shape [batch_size, hidden_dim]

class easydel.modules.deepseek_v3.modeling_deepseek_flax.DeepseekV3Model(*args: Any, **kwargs: Any)[source]#

Bases: EasyDeLBaseModule

property frequencies#

Retrieves or computes the frequency components (e.g., for RoPE) from the configuration.

Uses self.config.get_basic_frequencies() and caches the result.

Returns

The frequency components, potentially cached.

Return type

jnp.ndarray

class easydel.modules.deepseek_v3.modeling_deepseek_flax.MoEGate(*args: Any, **kwargs: Any)[source]#

Bases: Module

easydel.modules.deepseek_v3.modeling_deepseek_flax.apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1)[source]#
easydel.modules.deepseek_v3.modeling_deepseek_flax.init_deepseek_rotary_embedding(dim, max_position_embeddings=2048, base=10000, method: Literal['linear', 'yarn', 'dynamic', None] = None, kwargs: Optional[dict] = None)[source]#
easydel.modules.deepseek_v3.modeling_deepseek_flax.rotate_half(x)[source]#
easydel.modules.deepseek_v3.modeling_deepseek_flax.yarn_find_correction_dim(num_rotations, dim, base=10000, max_position_embeddings=2048)[source]#
easydel.modules.deepseek_v3.modeling_deepseek_flax.yarn_find_correction_range(low_rot, high_rot, dim, base=10000, max_position_embeddings=2048)[source]#
easydel.modules.deepseek_v3.modeling_deepseek_flax.yarn_get_mscale(scale=1.0, mscale=1.0)[source]#
easydel.modules.deepseek_v3.modeling_deepseek_flax.yarn_linear_ramp_mask(min, max, dim)[source]#