diff --git a/.env.default b/.env.default index 2ea4d63..1ff0207 100644 --- a/.env.default +++ b/.env.default @@ -79,11 +79,25 @@ AE_DB_USERNAME=aether_dev AE_DB_PASSWORD=XXXX # Connection Tuning +# Seconds to wait when establishing a new connection before giving up. +# Lower values fail fast on DB outage rather than hanging requests. AE_DB_CONNECTION_TIMEOUT=7 + +# Seconds before a pooled connection is recycled (closed and reopened). +# Prevents "MySQL server has gone away" errors from MariaDB's wait_timeout. +# Must be less than MariaDB's wait_timeout (default 28800s / 8 hours). +# 900s (15 min) is a safe conservative value for active workloads. AE_DB_POOL_RECYCLE=900 -# Pool size per API replica. Total max DB connections = AE_API_REPLICAS × (AE_DB_POOL_SIZE + AE_DB_POOL_MAX_OVERFLOW) -# With defaults (10+20) and 3 replicas = 90 max connections. MARIADB_MAX_CONNECTIONS must be higher. + +# Connections held open per API replica at idle (the "warm" pool). +# Each replica maintains this many persistent connections to MariaDB. AE_DB_POOL_SIZE=10 + +# Additional connections a replica can open beyond AE_DB_POOL_SIZE under burst load. +# These are created on demand and closed when the burst subsides. +# Max connections per replica = AE_DB_POOL_SIZE + AE_DB_POOL_MAX_OVERFLOW. +# Total max DB connections across all replicas = AE_API_REPLICAS × (AE_DB_POOL_SIZE + AE_DB_POOL_MAX_OVERFLOW). +# Example: 3 replicas × (10 + 20) = 90 max connections. MARIADB_MAX_CONNECTIONS must exceed this. AE_DB_POOL_MAX_OVERFLOW=20 # ------------------------------------------------------------------------------ @@ -96,23 +110,44 @@ AE_REDIS_PORT=6379 # ------------------------------------------------------------------------------ # API SETTINGS (FastAPI) # ------------------------------------------------------------------------------ -# Number of API container instances to run (Docker Compose scaling) + +# Number of API container instances (Docker Compose replica scaling). +# Each replica is an independent container with its own Gunicorn process and +# connection pool. Total DB connections = AE_API_REPLICAS × (AE_DB_POOL_SIZE + AE_DB_POOL_MAX_OVERFLOW). +# Increase for horizontal scaling across CPU cores. On a single-node Linode, +# 2-4 replicas is typical; more replicas won't help if the DB is the bottleneck. AE_API_REPLICAS=3 -# Gunicorn / Uvicorn Tuning -# AE_API_GUNICORN_TIMEOUT: worker timeout in seconds. Default in gunicorn_conf.py -# is 120s. Raise for endpoints that run long ffmpeg operations (clip_video, etc.) -# The dev .env typically sets this to 900 to accommodate 5-15 min video jobs. +# --- Gunicorn / Uvicorn Tuning --- + +# Internal port Gunicorn listens on inside the container. Nginx proxies to this. +# Each replica uses this same port within its own network namespace. AE_API_GUNICORN_PORT=5065 + +# Worker timeout in seconds. A request that takes longer than this causes Gunicorn +# to kill and restart the worker. Default in gunicorn_conf.py is 120s. +# Raise for endpoints that run long ffmpeg operations (clip_video, convert_file, etc.). +# Dev typically uses 900s to accommodate 5-15 min video jobs. AE_API_GUNICORN_TIMEOUT=900 -AE_API_GUNICORN_WORKERS=2 + +# Uvicorn worker processes per replica. Each worker handles requests independently +# using async I/O, but SQLAlchemy DB calls are synchronous and block the worker. +# More workers = more parallel DB queries. Recommended: 2-4 per replica. +# Total parallel DB query capacity ≈ AE_API_REPLICAS × AE_API_GUNICORN_WORKERS. +# Stress testing at 4 workers/replica yielded ~2x throughput vs 2 workers (14 req/s vs 7.5 req/s). +# Rule of thumb: (2 × CPU cores) + 1 per replica, but DB throughput caps before CPU becomes the limit. +AE_API_GUNICORN_WORKERS=4 + +# Threads per Gunicorn worker. Uvicorn workers use async I/O, so threading provides +# minimal benefit here. Leave at 1 unless explicitly benchmarked otherwise. AE_API_GUNICORN_THREADS=1 # Security & CORS -# JWT_KEY should be a 22+ character secret string +# JWT_KEY should be a 22+ character secret string. Rotate if compromised. AE_API_JWT_KEY=XXXX -# Regex for allowed CORS origins +# Regex for allowed CORS origins. Requests from non-matching origins are blocked. +# Extend the pattern if adding new domains or local dev ports. AE_API_ORIGINS_REGEX="(https://.*\.oneskyit\.com)|(http://.*\.oneskyit\.com)|(http://.*.localhost)|(http://.*.localhost:5173)" # ------------------------------------------------------------------------------