200 lines
5.3 KiB
Bash
200 lines
5.3 KiB
Bash
#!/bin/bash
|
|
# =============================================================================
|
|
# IGNY8 Health Check Script
|
|
# =============================================================================
|
|
# Checks health of all services and reports status
|
|
# Usage: ./health-check.sh [--quiet]
|
|
# =============================================================================
|
|
|
|
set -e
|
|
|
|
QUIET="${1}"
|
|
EXIT_CODE=0
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
# Output functions
|
|
output() {
|
|
if [[ "${QUIET}" != "--quiet" ]]; then
|
|
echo -e "$1"
|
|
fi
|
|
}
|
|
|
|
check_passed() {
|
|
output "${GREEN}✅ $1${NC}"
|
|
}
|
|
|
|
check_failed() {
|
|
output "${RED}❌ $1${NC}"
|
|
EXIT_CODE=1
|
|
}
|
|
|
|
check_warn() {
|
|
output "${YELLOW}⚠️ $1${NC}"
|
|
}
|
|
|
|
# Check HTTP endpoint
|
|
check_endpoint() {
|
|
local name="$1"
|
|
local url="$2"
|
|
local timeout="${3:-10}"
|
|
|
|
local response
|
|
local http_code
|
|
local time_total
|
|
|
|
response=$(curl -s -o /dev/null -w "%{http_code}|%{time_total}" --max-time "${timeout}" "${url}" 2>/dev/null || echo "000|0")
|
|
http_code=$(echo "${response}" | cut -d'|' -f1)
|
|
time_total=$(echo "${response}" | cut -d'|' -f2)
|
|
|
|
if [[ "${http_code}" == "200" ]]; then
|
|
check_passed "${name}: OK (${time_total}s)"
|
|
return 0
|
|
else
|
|
check_failed "${name}: FAILED (HTTP ${http_code})"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check container status
|
|
check_container() {
|
|
local name="$1"
|
|
|
|
if docker ps --format '{{.Names}}' | grep -q "^${name}$"; then
|
|
local status=$(docker inspect --format='{{.State.Health.Status}}' "${name}" 2>/dev/null || echo "none")
|
|
if [[ "${status}" == "healthy" ]]; then
|
|
check_passed "Container ${name}: Running (healthy)"
|
|
elif [[ "${status}" == "none" ]]; then
|
|
check_passed "Container ${name}: Running (no healthcheck)"
|
|
else
|
|
check_warn "Container ${name}: Running (${status})"
|
|
fi
|
|
return 0
|
|
else
|
|
check_failed "Container ${name}: NOT RUNNING"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check disk space
|
|
check_disk() {
|
|
local path="$1"
|
|
local threshold="${2:-80}"
|
|
|
|
local usage=$(df "${path}" | tail -1 | awk '{print $5}' | sed 's/%//')
|
|
|
|
if [[ ${usage} -lt ${threshold} ]]; then
|
|
check_passed "Disk ${path}: ${usage}% used"
|
|
elif [[ ${usage} -lt 90 ]]; then
|
|
check_warn "Disk ${path}: ${usage}% used (warning)"
|
|
else
|
|
check_failed "Disk ${path}: ${usage}% used (critical)"
|
|
fi
|
|
}
|
|
|
|
# Check Redis connection
|
|
check_redis() {
|
|
if docker exec redis redis-cli ping 2>/dev/null | grep -q "PONG"; then
|
|
check_passed "Redis: Connected"
|
|
return 0
|
|
else
|
|
check_failed "Redis: NOT RESPONDING"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check PostgreSQL connection
|
|
check_postgres() {
|
|
if docker exec postgres pg_isready -U igny8 2>/dev/null | grep -q "accepting"; then
|
|
check_passed "PostgreSQL: Accepting connections"
|
|
return 0
|
|
else
|
|
check_failed "PostgreSQL: NOT READY"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check Celery queue
|
|
check_celery_queue() {
|
|
local queue_length=$(docker exec redis redis-cli llen celery 2>/dev/null || echo "error")
|
|
|
|
if [[ "${queue_length}" == "error" ]]; then
|
|
check_warn "Celery queue: Cannot check"
|
|
elif [[ ${queue_length} -lt 50 ]]; then
|
|
check_passed "Celery queue: ${queue_length} tasks"
|
|
elif [[ ${queue_length} -lt 200 ]]; then
|
|
check_warn "Celery queue: ${queue_length} tasks (high)"
|
|
else
|
|
check_failed "Celery queue: ${queue_length} tasks (critical)"
|
|
fi
|
|
}
|
|
|
|
# Main health check
|
|
main() {
|
|
if [[ "${QUIET}" != "--quiet" ]]; then
|
|
echo "=========================================="
|
|
echo "IGNY8 Health Check"
|
|
echo "$(date '+%Y-%m-%d %H:%M:%S')"
|
|
echo "=========================================="
|
|
echo ""
|
|
fi
|
|
|
|
# Infrastructure
|
|
output "INFRASTRUCTURE:"
|
|
check_postgres
|
|
check_redis
|
|
check_disk "/data"
|
|
echo ""
|
|
|
|
# Production containers
|
|
output "PRODUCTION CONTAINERS:"
|
|
check_container "igny8_backend"
|
|
check_container "igny8_frontend"
|
|
check_container "igny8_celery_worker"
|
|
check_container "igny8_celery_beat"
|
|
echo ""
|
|
|
|
# Production endpoints
|
|
output "PRODUCTION ENDPOINTS:"
|
|
check_endpoint "API Status" "http://localhost:8011/api/v1/system/status/" 5
|
|
check_endpoint "Frontend" "http://localhost:8021" 5
|
|
echo ""
|
|
|
|
# Celery
|
|
output "BACKGROUND TASKS:"
|
|
check_celery_queue
|
|
echo ""
|
|
|
|
# Staging (if running)
|
|
if docker ps --format '{{.Names}}' | grep -q "igny8_staging_backend"; then
|
|
output "STAGING CONTAINERS:"
|
|
check_container "igny8_staging_backend"
|
|
check_container "igny8_staging_frontend"
|
|
echo ""
|
|
|
|
output "STAGING ENDPOINTS:"
|
|
check_endpoint "Staging API" "http://localhost:8012/api/v1/system/status/" 5
|
|
check_endpoint "Staging Frontend" "http://localhost:8024" 5
|
|
echo ""
|
|
fi
|
|
|
|
# Summary
|
|
if [[ "${QUIET}" != "--quiet" ]]; then
|
|
echo "=========================================="
|
|
if [[ ${EXIT_CODE} -eq 0 ]]; then
|
|
echo -e "${GREEN}All checks passed${NC}"
|
|
else
|
|
echo -e "${RED}Some checks failed${NC}"
|
|
fi
|
|
echo "=========================================="
|
|
fi
|
|
|
|
exit ${EXIT_CODE}
|
|
}
|
|
|
|
main "$@"
|