Dev ops prep
This commit is contained in:
199
scripts/ops/health-check.sh
Normal file
199
scripts/ops/health-check.sh
Normal file
@@ -0,0 +1,199 @@
|
||||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# IGNY8 Health Check Script
|
||||
# =============================================================================
|
||||
# Checks health of all services and reports status
|
||||
# Usage: ./health-check.sh [--quiet]
|
||||
# =============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
QUIET="${1}"
|
||||
EXIT_CODE=0
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Output functions
|
||||
output() {
|
||||
if [[ "${QUIET}" != "--quiet" ]]; then
|
||||
echo -e "$1"
|
||||
fi
|
||||
}
|
||||
|
||||
check_passed() {
|
||||
output "${GREEN}✅ $1${NC}"
|
||||
}
|
||||
|
||||
check_failed() {
|
||||
output "${RED}❌ $1${NC}"
|
||||
EXIT_CODE=1
|
||||
}
|
||||
|
||||
check_warn() {
|
||||
output "${YELLOW}⚠️ $1${NC}"
|
||||
}
|
||||
|
||||
# Check HTTP endpoint
|
||||
check_endpoint() {
|
||||
local name="$1"
|
||||
local url="$2"
|
||||
local timeout="${3:-10}"
|
||||
|
||||
local response
|
||||
local http_code
|
||||
local time_total
|
||||
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}|%{time_total}" --max-time "${timeout}" "${url}" 2>/dev/null || echo "000|0")
|
||||
http_code=$(echo "${response}" | cut -d'|' -f1)
|
||||
time_total=$(echo "${response}" | cut -d'|' -f2)
|
||||
|
||||
if [[ "${http_code}" == "200" ]]; then
|
||||
check_passed "${name}: OK (${time_total}s)"
|
||||
return 0
|
||||
else
|
||||
check_failed "${name}: FAILED (HTTP ${http_code})"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check container status
|
||||
check_container() {
|
||||
local name="$1"
|
||||
|
||||
if docker ps --format '{{.Names}}' | grep -q "^${name}$"; then
|
||||
local status=$(docker inspect --format='{{.State.Health.Status}}' "${name}" 2>/dev/null || echo "none")
|
||||
if [[ "${status}" == "healthy" ]]; then
|
||||
check_passed "Container ${name}: Running (healthy)"
|
||||
elif [[ "${status}" == "none" ]]; then
|
||||
check_passed "Container ${name}: Running (no healthcheck)"
|
||||
else
|
||||
check_warn "Container ${name}: Running (${status})"
|
||||
fi
|
||||
return 0
|
||||
else
|
||||
check_failed "Container ${name}: NOT RUNNING"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check disk space
|
||||
check_disk() {
|
||||
local path="$1"
|
||||
local threshold="${2:-80}"
|
||||
|
||||
local usage=$(df "${path}" | tail -1 | awk '{print $5}' | sed 's/%//')
|
||||
|
||||
if [[ ${usage} -lt ${threshold} ]]; then
|
||||
check_passed "Disk ${path}: ${usage}% used"
|
||||
elif [[ ${usage} -lt 90 ]]; then
|
||||
check_warn "Disk ${path}: ${usage}% used (warning)"
|
||||
else
|
||||
check_failed "Disk ${path}: ${usage}% used (critical)"
|
||||
fi
|
||||
}
|
||||
|
||||
# Check Redis connection
|
||||
check_redis() {
|
||||
if docker exec redis redis-cli ping 2>/dev/null | grep -q "PONG"; then
|
||||
check_passed "Redis: Connected"
|
||||
return 0
|
||||
else
|
||||
check_failed "Redis: NOT RESPONDING"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check PostgreSQL connection
|
||||
check_postgres() {
|
||||
if docker exec postgres pg_isready -U igny8 2>/dev/null | grep -q "accepting"; then
|
||||
check_passed "PostgreSQL: Accepting connections"
|
||||
return 0
|
||||
else
|
||||
check_failed "PostgreSQL: NOT READY"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check Celery queue
|
||||
check_celery_queue() {
|
||||
local queue_length=$(docker exec redis redis-cli llen celery 2>/dev/null || echo "error")
|
||||
|
||||
if [[ "${queue_length}" == "error" ]]; then
|
||||
check_warn "Celery queue: Cannot check"
|
||||
elif [[ ${queue_length} -lt 50 ]]; then
|
||||
check_passed "Celery queue: ${queue_length} tasks"
|
||||
elif [[ ${queue_length} -lt 200 ]]; then
|
||||
check_warn "Celery queue: ${queue_length} tasks (high)"
|
||||
else
|
||||
check_failed "Celery queue: ${queue_length} tasks (critical)"
|
||||
fi
|
||||
}
|
||||
|
||||
# Main health check
|
||||
main() {
|
||||
if [[ "${QUIET}" != "--quiet" ]]; then
|
||||
echo "=========================================="
|
||||
echo "IGNY8 Health Check"
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Infrastructure
|
||||
output "INFRASTRUCTURE:"
|
||||
check_postgres
|
||||
check_redis
|
||||
check_disk "/data"
|
||||
echo ""
|
||||
|
||||
# Production containers
|
||||
output "PRODUCTION CONTAINERS:"
|
||||
check_container "igny8_backend"
|
||||
check_container "igny8_frontend"
|
||||
check_container "igny8_celery_worker"
|
||||
check_container "igny8_celery_beat"
|
||||
echo ""
|
||||
|
||||
# Production endpoints
|
||||
output "PRODUCTION ENDPOINTS:"
|
||||
check_endpoint "API Status" "http://localhost:8011/api/v1/system/status/" 5
|
||||
check_endpoint "Frontend" "http://localhost:8021" 5
|
||||
echo ""
|
||||
|
||||
# Celery
|
||||
output "BACKGROUND TASKS:"
|
||||
check_celery_queue
|
||||
echo ""
|
||||
|
||||
# Staging (if running)
|
||||
if docker ps --format '{{.Names}}' | grep -q "igny8_staging_backend"; then
|
||||
output "STAGING CONTAINERS:"
|
||||
check_container "igny8_staging_backend"
|
||||
check_container "igny8_staging_frontend"
|
||||
echo ""
|
||||
|
||||
output "STAGING ENDPOINTS:"
|
||||
check_endpoint "Staging API" "http://localhost:8012/api/v1/system/status/" 5
|
||||
check_endpoint "Staging Frontend" "http://localhost:8024" 5
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Summary
|
||||
if [[ "${QUIET}" != "--quiet" ]]; then
|
||||
echo "=========================================="
|
||||
if [[ ${EXIT_CODE} -eq 0 ]]; then
|
||||
echo -e "${GREEN}All checks passed${NC}"
|
||||
else
|
||||
echo -e "${RED}Some checks failed${NC}"
|
||||
fi
|
||||
echo "=========================================="
|
||||
fi
|
||||
|
||||
exit ${EXIT_CODE}
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user