Monitoring¶
Monitor Fast LiteLLM performance and health in production environments.
Health Checks¶
Basic Health Check¶
import fast_litellm
health = fast_litellm.health_check()
print(f"Status: {health['status']}")
print(f"Rust Available: {health['rust_available']}")
print(f"Components: {health['components']}")
HTTP Health Endpoint¶
Integrate with your web framework:
from flask import Flask, jsonify
import fast_litellm
app = Flask(__name__)
@app.route('/health')
def health():
health_status = fast_litellm.health_check()
status_code = 200 if health_status['status'] == 'ok' else 503
return jsonify(health_status), status_code
@app.route('/health/detailed')
def health_detailed():
return jsonify({
'health': fast_litellm.health_check(),
'features': fast_litellm.get_feature_status(),
'patch_status': fast_litellm.get_patch_status(),
})
Performance Metrics¶
Collecting Metrics¶
Fast LiteLLM automatically collects performance metrics:
import fast_litellm
# Get all performance stats
stats = fast_litellm.get_performance_stats()
for key, value in stats.items():
print(f"{key}: {value}")
# Get stats for specific component
routing_stats = fast_litellm.get_performance_stats(component="routing")
Recording Custom Metrics¶
Record your own performance data:
import fast_litellm
import time
start = time.perf_counter()
# Your operation
result = do_something()
duration_ms = (time.perf_counter() - start) * 1000
fast_litellm.record_performance(
component="my_component",
operation="do_something",
duration_ms=duration_ms,
success=True,
input_size=len(input_data),
output_size=len(result)
)
Comparing Implementations¶
Compare Rust vs Python performance:
import fast_litellm
comparison = fast_litellm.compare_implementations(
rust_component="rust_rate_limiter",
python_component="python_rate_limiter"
)
print(f"Rust avg: {comparison.get('rust_avg_ms', 'N/A')}ms")
print(f"Python avg: {comparison.get('python_avg_ms', 'N/A')}ms")
print(f"Speedup: {comparison.get('speedup', 'N/A')}x")
Feature Status Monitoring¶
Check Feature Status¶
import fast_litellm
features = fast_litellm.get_feature_status()
for name, status in features.items():
enabled = "ON" if status.get('enabled') else "OFF"
errors = status.get('errors', 0)
rollout = status.get('rollout_percentage', 100)
print(f"{name}: {enabled} (errors: {errors}, rollout: {rollout}%)")
Monitor Error Rates¶
import fast_litellm
features = fast_litellm.get_feature_status()
for name, status in features.items():
errors = status.get('errors', 0)
if errors > 5:
print(f"WARNING: {name} has {errors} errors")
# Auto-disabled after 10 errors by default
if errors >= 10 and status.get('enabled'):
print(f"CRITICAL: {name} should be disabled")
Component-Specific Monitoring¶
Rate Limiter Stats¶
import fast_litellm
stats = fast_litellm.get_rate_limit_stats()
print(f"Total checks: {stats.get('total_checks', 0)}")
print(f"Allowed: {stats.get('allowed_count', 0)}")
print(f"Denied: {stats.get('denied_count', 0)}")
Connection Pool Stats¶
import fast_litellm
stats = fast_litellm.get_connection_pool_stats()
print(f"Total connections: {stats.get('total_connections', 0)}")
print(f"Active: {stats.get('active_connections', 0)}")
print(f"Idle: {stats.get('idle_connections', 0)}")
Exporting Data¶
JSON Export¶
import fast_litellm
import json
# Export all data
data = fast_litellm.export_performance_data(format="json")
parsed = json.loads(data)
# Save to file
with open('metrics.json', 'w') as f:
f.write(data)
CSV Export¶
import fast_litellm
csv_data = fast_litellm.export_performance_data(format="csv")
with open('metrics.csv', 'w') as f:
f.write(csv_data)
Component-Specific Export¶
import fast_litellm
# Export only rate limiter data
rate_limit_data = fast_litellm.export_performance_data(
component="rate_limiter",
format="json"
)
Integration with Monitoring Systems¶
Prometheus¶
from prometheus_client import Gauge, Counter
import fast_litellm
# Define metrics
rust_available = Gauge('fast_litellm_rust_available', 'Rust acceleration available')
feature_enabled = Gauge('fast_litellm_feature_enabled', 'Feature enabled', ['feature'])
feature_errors = Counter('fast_litellm_feature_errors', 'Feature errors', ['feature'])
def update_metrics():
# Health status
health = fast_litellm.health_check()
rust_available.set(1 if health['rust_available'] else 0)
# Feature status
features = fast_litellm.get_feature_status()
for name, status in features.items():
feature_enabled.labels(feature=name).set(1 if status.get('enabled') else 0)
feature_errors.labels(feature=name).inc(status.get('errors', 0))
Datadog¶
from datadog import statsd
import fast_litellm
def send_metrics():
# Health check
health = fast_litellm.health_check()
statsd.gauge('fast_litellm.healthy', 1 if health['status'] == 'ok' else 0)
# Performance stats
stats = fast_litellm.get_performance_stats()
for key, value in stats.items():
if isinstance(value, (int, float)):
statsd.gauge(f'fast_litellm.{key}', value)
# Component stats
pool_stats = fast_litellm.get_connection_pool_stats()
statsd.gauge('fast_litellm.connections.active',
pool_stats.get('active_connections', 0))
CloudWatch¶
import boto3
import fast_litellm
cloudwatch = boto3.client('cloudwatch')
def publish_metrics():
health = fast_litellm.health_check()
cloudwatch.put_metric_data(
Namespace='FastLiteLLM',
MetricData=[
{
'MetricName': 'Healthy',
'Value': 1 if health['status'] == 'ok' else 0,
'Unit': 'Count'
},
]
)
Alerting¶
Error Threshold Alerts¶
import fast_litellm
def check_alerts():
alerts = []
# Check feature errors
features = fast_litellm.get_feature_status()
for name, status in features.items():
errors = status.get('errors', 0)
if errors >= 5:
alerts.append(f"HIGH: {name} has {errors} errors")
elif errors >= 10:
alerts.append(f"CRITICAL: {name} disabled due to errors")
# Check connection pool
pool_stats = fast_litellm.get_connection_pool_stats()
active = pool_stats.get('active_connections', 0)
if active > 100:
alerts.append(f"WARNING: High connection count: {active}")
return alerts
Recommendations¶
import fast_litellm
recommendations = fast_litellm.get_recommendations()
for rec in recommendations:
priority = rec.get('priority', 'medium')
message = rec.get('message', 'Unknown recommendation')
print(f"[{priority.upper()}] {message}")
Dashboard Example¶
Create a simple monitoring dashboard:
import fast_litellm
from flask import Flask, render_template_string
app = Flask(__name__)
DASHBOARD_TEMPLATE = """
<!DOCTYPE html>
<html>
<head><title>Fast LiteLLM Dashboard</title></head>
<body>
<h1>Fast LiteLLM Status</h1>
<h2>Health</h2>
<p>Status: {{ health.status }}</p>
<p>Rust Available: {{ health.rust_available }}</p>
<h2>Features</h2>
<table border="1">
<tr><th>Feature</th><th>Enabled</th><th>Errors</th></tr>
{% for name, status in features.items() %}
<tr>
<td>{{ name }}</td>
<td>{{ 'Yes' if status.enabled else 'No' }}</td>
<td>{{ status.errors }}</td>
</tr>
{% endfor %}
</table>
<h2>Connection Pool</h2>
<p>Active: {{ pool_stats.active_connections }}</p>
<p>Idle: {{ pool_stats.idle_connections }}</p>
</body>
</html>
"""
@app.route('/dashboard')
def dashboard():
return render_template_string(
DASHBOARD_TEMPLATE,
health=fast_litellm.health_check(),
features=fast_litellm.get_feature_status(),
pool_stats=fast_litellm.get_connection_pool_stats()
)
Next Steps¶
- Configuration - Fine-tune monitoring settings
- Performance Tuning - Optimize based on metrics