#
Advanced Troubleshooting
Deep-dive troubleshooting for complex Sonora issues.
#
Performance Issues
#
High CPU Usage
Symptoms:
- CPU usage > 80%
- Delayed responses
- Audio stuttering
Diagnosis:
# Check system load
sonoractl performance
# Monitor specific processes
ps aux | grep sonora
# Check Lavalink stats
curl http://localhost:2333/stats
Solutions:
# Enable performance mode
client = SonoraClient(..., performance_mode="overdrive")
# Reduce filter complexity
await player.clear_filters()
# Limit concurrent operations
semaphore = asyncio.Semaphore(10)
async with semaphore:
await player.play(query)
#
Memory Leaks
Symptoms:
- Increasing memory usage
- OutOfMemoryError
- System slowdown
Diagnosis:
import tracemalloc
tracemalloc.start()
# Take snapshots
snapshot1 = tracemalloc.take_snapshot()
# ... run operations ...
snapshot2 = tracemalloc.take_snapshot()
# Compare
stats = snapshot2.compare_to(snapshot1, 'lineno')
for stat in stats[:10]:
print(stat)
Solutions:
# Implement object pooling
track_pool = ObjectPool(Track, max_size=1000)
# Force garbage collection
import gc
gc.collect()
# Monitor queue size
if player.queue.length > 1000:
await player.queue.clear()
#
Network Issues
Symptoms:
- Connection timeouts
- Packet loss
- High latency
Diagnosis:
# Test connectivity
ping lavalink.example.com
# Check ports
netstat -tlnp | grep 2333
# Monitor network traffic
iftop -i eth0
Solutions:
# Configure timeouts
node_config = {
"host": "lavalink.example.com",
"port": 2333,
"timeout": 30,
"retry_policy": {"max_retries": 5}
}
# Use connection pooling
connector = aiohttp.TCPConnector(limit=100, ttl_dns_cache=30)
#
Audio Quality Issues
#
Audio Distortion
Symptoms:
- Crackling sounds
- Clipping
- Poor quality
Diagnosis:
# Check Lavalink configuration
# Verify sample rate and bitrate
print(f"Sample rate: {player.sample_rate}")
print(f"Bitrate: {player.bitrate}")
Solutions:
# Adjust audio settings
await player.set_quality(bitrate=128, sample_rate=44100)
# Reset filters
await player.clear_filters()
# Check source quality
if track.bitrate < 128:
print("Low quality source")
#
Sync Issues
Symptoms:
- Audio/video desync
- Delayed playback
Diagnosis:
# Check latency
latency = await player.get_latency()
print(f"Latency: {latency}ms")
# Verify NTP sync
import ntplib
client = ntplib.NTPClient()
response = client.request('pool.ntp.org')
print(f"NTP offset: {response.offset}")
Solutions:
# Enable sync correction
player.sync_correction = True
# Adjust buffer size
player.buffer_size = 1024 * 10 # 10KB buffer
# Use high-precision timing
import time
start = time.perf_counter()
# ... playback ...
end = time.perf_counter()
duration = end - start
#
Plugin Issues
#
Plugin Crashes
Symptoms:
- Plugin not loading
- Runtime errors
- System instability
Diagnosis:
# Check plugin logs
tail -f /var/log/sonora/plugins.log
# Validate plugin code
sonoractl plugin validate my_plugin
# Test in isolation
python -c "import my_plugin; print('Plugin OK')"
Solutions:
# Sandbox plugin execution
from sonora.security import PluginSandbox
sandbox = PluginSandbox()
try:
result = await sandbox.execute(plugin_code)
except Exception as e:
print(f"Plugin error: {e}")
# Implement error boundaries
async def safe_plugin_call(plugin_func, *args):
try:
return await plugin_func(*args)
except Exception as e:
logger.error(f"Plugin error: {e}")
return None
#
Permission Issues
Symptoms:
- Access denied errors
- Features not working
Diagnosis:
# Check plugin permissions
permissions = plugin.get_permissions()
print(f"Plugin permissions: {permissions}")
# Verify required permissions
required = ["track.read", "player.control"]
missing = [p for p in required if p not in permissions]
if missing:
print(f"Missing permissions: {missing}")
Solutions:
# Request additional permissions
plugin.request_permissions(["queue.modify"])
# Implement permission checks
if plugin.has_permission("track.modify"):
await modify_track(track)
else:
raise PermissionError("Insufficient permissions")
#
Database Issues
#
Connection Problems
Symptoms:
- Database timeouts
- Connection refused
- Data corruption
Diagnosis:
# Test database connection
mysqladmin ping -h localhost
# Check connection pool
SHOW PROCESSLIST;
# Verify data integrity
CHECK TABLE tracks;
Solutions:
# Configure connection pooling
db_config = {
"pool_size": 10,
"max_overflow": 20,
"pool_timeout": 30,
"pool_recycle": 3600
}
# Implement retry logic
async def retry_db_operation(operation, max_retries=3):
for attempt in range(max_retries):
try:
return await operation()
except DatabaseError:
if attempt == max_retries - 1:
raise
await asyncio.sleep(2 ** attempt)
#
Data Corruption
Symptoms:
- Invalid data
- Missing records
- Inconsistent state
Diagnosis:
-- Check for orphaned records
SELECT * FROM tracks WHERE id NOT IN (SELECT track_id FROM playlists);
-- Validate foreign keys
SELECT * FROM queue_items qi
LEFT JOIN tracks t ON qi.track_id = t.id
WHERE t.id IS NULL;
Solutions:
# Implement data validation
def validate_track_data(track):
required_fields = ['title', 'author', 'uri']
for field in required_fields:
if not track.get(field):
raise ValidationError(f"Missing {field}")
# Use transactions
async with db.transaction():
await db.execute("INSERT INTO tracks ...")
await db.execute("INSERT INTO queue_items ...")
#
Cluster Issues
#
Node Failures
Symptoms:
- Node disconnections
- Load imbalance
- Service degradation
Diagnosis:
# Check node health
sonoractl node status
# Monitor load distribution
sonoractl cluster stats
# Check failover logs
tail -f /var/log/sonora/cluster.log
Solutions:
# Configure health checks
node_config = {
"health_check_interval": 30,
"health_check_timeout": 5,
"failover_enabled": True
}
# Implement load balancing
load_balancer = LeastLoadedBalancer(nodes)
selected_node = load_balancer.select_node(track)
#
Synchronization Issues
Symptoms:
- State inconsistencies
- Duplicate operations
- Race conditions
Diagnosis:
# Check vector clocks
state1 = node1.get_state_vector()
state2 = node2.get_state_vector()
if state1 != state2:
print("State divergence detected")
# Monitor event ordering
events = event_log.get_recent_events()
for i in range(1, len(events)):
if events[i].timestamp < events[i-1].timestamp:
print("Event ordering violation")
Solutions:
# Use distributed locks
async with distributed_lock("queue_operation"):
await player.queue.add(track)
# Implement conflict resolution
def resolve_conflict(local_state, remote_state):
# Last-write-wins strategy
if local_state.timestamp > remote_state.timestamp:
return local_state
else:
return remote_state
#
Recovery Procedures
#
Emergency Shutdown
# Graceful shutdown
sonoractl shutdown --graceful
# Force shutdown
sonoractl shutdown --force
# Emergency stop
pkill -9 sonora
#
Data Recovery
# Restore from backup
sonoractl snapshot restore latest
# Rebuild indexes
sonoractl db rebuild-indexes
# Validate data
sonoractl db validate
#
Service Restoration
# Start in maintenance mode
sonoractl start --maintenance
# Run diagnostics
sonoractl doctor --full
# Enable normal operation
sonoractl maintenance off
#
Monitoring & Alerting
#
Key Metrics
# Performance metrics
metrics = {
"cpu_usage": psutil.cpu_percent(),
"memory_usage": psutil.virtual_memory().percent,
"active_connections": len(client.players),
"queue_length": sum(p.queue.length for p in client.players.values()),
"error_rate": error_counter / total_requests
}
# Alert thresholds
if metrics["cpu_usage"] > 90:
send_alert("High CPU usage")
if metrics["memory_usage"] > 85:
send_alert("High memory usage")
#
Log Analysis
# Search for errors
grep "ERROR" /var/log/sonora/*.log | tail -20
# Analyze performance
grep "duration" /var/log/sonora/performance.log | \
awk '{sum+=$2; count++} END {print sum/count}'
# Monitor trends
tail -f /var/log/sonora/*.log | grep -E "(WARN|ERROR)"
This comprehensive troubleshooting guide helps diagnose and resolve complex issues in Sonora deployments.