easydel.inference.esurge.metrics

Contents

easydel.inference.esurge.metrics#

eSurge Metrics Collection System.

class easydel.inference.esurge.metrics.CacheMetrics(timestamp: float, total_pages: int = 0, used_pages: int = 0, free_pages: int = 0, cache_hit_rate: float = 0.0, page_allocation_rate: float = 0.0, page_free_rate: float = 0.0)[source]#

Bases: object

Metrics for KV cache operations.

cache_hit_rate: float = 0.0#
free_pages: int = 0#
page_allocation_rate: float = 0.0#
page_free_rate: float = 0.0#
timestamp: float#
total_pages: int = 0#
used_pages: int = 0#
class easydel.inference.esurge.metrics.MetricsCollector(log_file: str | None = None, log_interval: float = 10.0, history_size: int = 1000, enable_detailed_logging: bool = True)[source]#

Bases: object

Centralized metrics collection and logging system for eSurge.

add_generated_tokens(request_id: str, num_tokens: int) None[source]#

Add generated tokens to a request’s metrics.

complete_request(request_id: str, finish_reason: str | None = None, error: str | None = None) None[source]#

Complete tracking for a request.

export_metrics(file_path: str, format: str = 'json') None[source]#

Export all metrics to a file.

get_system_metrics(window_seconds: float = 60.0) SystemMetrics[source]#

Get aggregated system metrics for the specified time window.

log_summary(force: bool = False) None[source]#

Log a summary of current metrics.

record_cache_event(event: str, details: dict[str, Any] | None = None) None[source]#

Record lifecycle events for the KV cache.

record_cache_metrics(total_pages: int, used_pages: int, cache_hit_rate: float = 0.0, page_allocation_rate: float = 0.0, page_free_rate: float = 0.0) None[source]#

Record KV cache metrics.

record_first_token(request_id: str) None[source]#

Record when the first token is generated for a request.

record_runner_metrics(execution_time: float, batch_size: int, num_tokens: int, memory_usage: dict[str, Any] | None = None) None[source]#

Record model runner performance metrics.

record_scheduler_metrics(num_waiting: int, num_running: int, num_scheduled_tokens: int, num_preempted: int = 0, batch_size: int = 0, schedule_time: float = 0.0) None[source]#

Record scheduler performance metrics.

reset_metrics() None[source]#

Reset all metrics and counters.

start_request(request_id: str, prompt_tokens: int = 0) None[source]#

Start tracking metrics for a new request.

class easydel.inference.esurge.metrics.ModelRunnerMetrics(timestamp: float, execution_time: float = 0.0, batch_size: int = 0, num_tokens: int = 0, tokens_per_second: float = 0.0, memory_usage: dict[str, Any] | None = None)[source]#

Bases: object

Metrics for model runner operations.

batch_size: int = 0#
execution_time: float = 0.0#
memory_usage: dict[str, Any] | None = None#
num_tokens: int = 0#
timestamp: float#
tokens_per_second: float = 0.0#
class easydel.inference.esurge.metrics.RequestMetrics(request_id: str, start_time: float, end_time: float | None = None, first_token_time: float | None = None, prompt_tokens: int = 0, generated_tokens: int = 0, total_tokens: int = 0, tokens_per_second: float = 0.0, time_to_first_token: float | None = None, total_latency: float | None = None, finish_reason: str | None = None, error: str | None = None)[source]#

Bases: object

Metrics for a single request.

end_time: float | None = None#
error: str | None = None#
finish_reason: str | None = None#
first_token_time: float | None = None#
generated_tokens: int = 0#
prompt_tokens: int = 0#
request_id: str#
start_time: float#
time_to_first_token: float | None = None#
tokens_per_second: float = 0.0#
total_latency: float | None = None#
total_tokens: int = 0#
class easydel.inference.esurge.metrics.SchedulerMetrics(timestamp: float, num_waiting_requests: int = 0, num_running_requests: int = 0, num_scheduled_tokens: int = 0, num_preempted_requests: int = 0, batch_size: int = 0, schedule_time: float = 0.0)[source]#

Bases: object

Metrics for scheduler operations.

batch_size: int = 0#
num_preempted_requests: int = 0#
num_running_requests: int = 0#
num_scheduled_tokens: int = 0#
num_waiting_requests: int = 0#
schedule_time: float = 0.0#
timestamp: float#
class easydel.inference.esurge.metrics.SystemMetrics(timestamp: float, total_requests_completed: int = 0, total_requests_failed: int = 0, total_tokens_generated: int = 0, average_latency: float = 0.0, average_ttft: float = 0.0, average_throughput: float = 0.0, requests_per_second: float = 0.0)[source]#

Bases: object

System-wide metrics summary.

average_latency: float = 0.0#
average_throughput: float = 0.0#
average_ttft: float = 0.0#
requests_per_second: float = 0.0#
timestamp: float#
total_requests_completed: int = 0#
total_requests_failed: int = 0#
total_tokens_generated: int = 0#
easydel.inference.esurge.metrics.get_metrics_collector() easydel.inference.esurge.metrics.MetricsCollector | None[source]#

Get the global metrics collector instance.

easydel.inference.esurge.metrics.initialize_metrics(log_file: str | None = None, log_interval: float = 10.0, history_size: int = 1000, enable_detailed_logging: bool = True) MetricsCollector[source]#

Initialize the global metrics collector.

easydel.inference.esurge.metrics.log_metrics_summary() None[source]#

Log a summary of current metrics if collector is initialized.