AI AUtomtaion, Schudelign and publishign fromt and backe end refoactr

This commit is contained in:
IGNY8 VPS (Salman)
2026-01-17 15:52:46 +00:00
parent 0435a5cf70
commit d3b3e1c0d4
34 changed files with 4715 additions and 375 deletions

View File

@@ -49,9 +49,9 @@ def check_scheduled_automations():
logger.info(f"[AutomationTask] Skipping site {config.site.id} - already ran today")
continue
# Check if already running
if AutomationRun.objects.filter(site=config.site, status='running').exists():
logger.info(f"[AutomationTask] Skipping site {config.site.id} - already running")
# Check if already running OR paused (don't start new if existing in progress)
if AutomationRun.objects.filter(site=config.site, status__in=['running', 'paused']).exists():
logger.info(f"[AutomationTask] Skipping site {config.site.id} - automation in progress (running/paused)")
continue
logger.info(f"[AutomationTask] Starting scheduled automation for site {config.site.id}")
@@ -162,13 +162,50 @@ def run_automation_task(self, run_id: str):
@shared_task(name='automation.resume_automation_task', bind=True, max_retries=0)
def resume_automation_task(self, run_id: str):
"""
Resume paused automation run from current stage
Resume paused automation run from current stage.
CRITICAL FIXES:
- Verifies run status is 'running' before processing
- Reacquires lock in case it expired during long pause
- Checks pause/cancel status after each stage
- Releases lock on failure
"""
logger.info(f"[AutomationTask] Resuming automation run: {run_id}")
try:
from django.core.cache import cache
# Load run and verify status
run = AutomationRun.objects.get(run_id=run_id)
# CRITICAL FIX: Verify run is actually in 'running' status
# (status is set to 'running' by views.resume before calling this task)
if run.status != 'running':
logger.warning(f"[AutomationTask] Run {run_id} status is '{run.status}', not 'running'. Aborting resume.")
return
# CRITICAL FIX: Reacquire lock in case it expired during long pause (6hr timeout)
lock_key = f'automation_lock_{run.site.id}'
lock_acquired = cache.add(lock_key, run_id, timeout=21600) # 6 hours
if not lock_acquired:
# Lock exists - check if it's ours (from original run start)
existing_lock = cache.get(lock_key)
# If lock exists but isn't our run_id, another run may have started
if existing_lock and existing_lock != run_id and existing_lock != 'locked':
logger.warning(f"[AutomationTask] Lock held by different run ({existing_lock}). Aborting resume for {run_id}")
run.status = 'failed'
run.error_message = f'Lock acquired by another run ({existing_lock}) during pause'
run.completed_at = timezone.now()
run.save()
return
# Lock exists and is either 'locked' (our old format) or our run_id - proceed
logger.info(f"[AutomationTask] Existing lock found, proceeding with resume")
else:
# We acquired a new lock (old one expired)
logger.info(f"[AutomationTask] Reacquired lock after expiry for run {run_id}")
service = AutomationService.from_run_id(run_id)
run = service.run
config = service.config
# Continue from current stage
@@ -196,20 +233,35 @@ def resume_automation_task(self, run_id: str):
for stage in range(run.current_stage - 1, 7):
if stage_enabled[stage]:
stage_methods[stage]()
# CRITICAL FIX: Check for pause/cancel AFTER each stage (same as run_automation_task)
service.run.refresh_from_db()
if service.run.status in ['paused', 'cancelled']:
logger.info(f"[AutomationTask] Resumed automation {service.run.status} after stage {stage + 1}")
return
else:
logger.info(f"[AutomationTask] Stage {stage + 1} is disabled, skipping")
logger.info(f"[AutomationTask] Resumed automation run: {run_id}")
logger.info(f"[AutomationTask] Resumed automation completed: {run_id}")
except Exception as e:
logger.error(f"[AutomationTask] Failed to resume automation run {run_id}: {e}")
# Mark as failed
run = AutomationRun.objects.get(run_id=run_id)
run.status = 'failed'
run.error_message = str(e)
run.completed_at = timezone.now()
run.save()
# Mark as failed and release lock
try:
run = AutomationRun.objects.get(run_id=run_id)
run.status = 'failed'
run.error_message = str(e)
run.completed_at = timezone.now()
run.save()
# Release lock on failure
from django.core.cache import cache
cache.delete(f'automation_lock_{run.site.id}')
except Exception as cleanup_err:
logger.error(f"[AutomationTask] Failed to cleanup after resume failure: {cleanup_err}")
raise
# Alias for continue_automation_task (same as resume)