Automation revamp part 1

2025-12-28 01:46:27 +00:00
parent 0605f650b1
commit ea9125b805
9 changed files with 1237 additions and 58 deletions
--- a/backend/igny8_core/business/automation/services/automation_service.py
+++ b/backend/igny8_core/business/automation/services/automation_service.py
@@ -109,6 +109,9 @@ class AutomationService:
            # Create run_id and log files
            run_id = self.logger.start_run(self.account.id, self.site.id, trigger_type)
            
+            # Capture initial queue snapshot for accurate progress tracking
+            initial_snapshot = self._capture_initial_snapshot()
+            
            # Create AutomationRun record
            self.run = AutomationRun.objects.create(
                run_id=run_id,
@@ -117,6 +120,7 @@ class AutomationService:
                trigger_type=trigger_type,
                status='running',
                current_stage=1,
+                initial_snapshot=initial_snapshot,
            )
            
            # Log start
@@ -124,6 +128,10 @@ class AutomationService:
                run_id, self.account.id, self.site.id, 0,
                f"Automation started (trigger: {trigger_type})"
            )
+            self.logger.log_stage_progress(
+                run_id, self.account.id, self.site.id, 0,
+                f"Initial snapshot captured: {initial_snapshot['total_initial_items']} total items across all stages"
+            )
            self.logger.log_stage_progress(
                run_id, self.account.id, self.site.id, 0,
                f"Credit check: Account has {self.account.credits} credits, estimated need: {estimated_credits} credits"
@@ -1361,6 +1369,61 @@ class AutomationService:
        logger.info(f"[AutomationService] Estimated credits: {total}")
        return total
    
+    def _capture_initial_snapshot(self) -> dict:
+        """
+        Capture initial queue sizes at run start for accurate progress tracking.
+        This snapshot is used to calculate global progress percentage correctly.
+        """
+        # Stage 1: Keywords pending clustering
+        stage_1_initial = Keywords.objects.filter(
+            site=self.site, status='new', cluster__isnull=True, disabled=False
+        ).count()
+        
+        # Stage 2: Clusters needing ideas
+        stage_2_initial = Clusters.objects.filter(
+            site=self.site, status='new', disabled=False
+        ).exclude(ideas__isnull=False).count()
+        
+        # Stage 3: Ideas ready to be converted to tasks
+        stage_3_initial = ContentIdeas.objects.filter(
+            site=self.site, status='new'
+        ).count()
+        
+        # Stage 4: Tasks ready for content generation
+        stage_4_initial = Tasks.objects.filter(
+            site=self.site, status='queued'
+        ).count()
+        
+        # Stage 5: Content needing image prompts
+        stage_5_initial = Content.objects.filter(
+            site=self.site, status='draft'
+        ).annotate(images_count=Count('images')).filter(images_count=0).count()
+        
+        # Stage 6: Image prompts pending generation
+        stage_6_initial = Images.objects.filter(
+            site=self.site, status='pending'
+        ).count()
+        
+        # Stage 7: Content ready for review
+        stage_7_initial = Content.objects.filter(
+            site=self.site, status='review'
+        ).count()
+        
+        snapshot = {
+            'stage_1_initial': stage_1_initial,
+            'stage_2_initial': stage_2_initial,
+            'stage_3_initial': stage_3_initial,
+            'stage_4_initial': stage_4_initial,
+            'stage_5_initial': stage_5_initial,
+            'stage_6_initial': stage_6_initial,
+            'stage_7_initial': stage_7_initial,
+            'total_initial_items': stage_1_initial + stage_2_initial + stage_3_initial + 
+                                   stage_4_initial + stage_5_initial + stage_6_initial + stage_7_initial,
+        }
+        
+        logger.info(f"[AutomationService] Initial snapshot captured: {snapshot}")
+        return snapshot
+    
    # Helper methods
    
    def _wait_for_task(self, task_id: str, stage_number: int, item_name: str, continue_on_error: bool = True):
@@ -1559,7 +1622,7 @@ class AutomationService:
    def _get_stage_3_state(self) -> dict:
        """Get processing state for Stage 3: Ideas → Tasks"""
        queue = ContentIdeas.objects.filter(
-            site=self.site, status='approved'
+            site=self.site, status='new'  # Fixed: Match pipeline_overview status
        ).order_by('id')
        
        processed = self._get_processed_count(3)
@@ -1580,7 +1643,7 @@ class AutomationService:
    def _get_stage_4_state(self) -> dict:
        """Get processing state for Stage 4: Tasks → Content"""
        queue = Tasks.objects.filter(
-            site=self.site, status='ready'
+            site=self.site, status='queued'  # Fixed: Match pipeline_overview status
        ).order_by('id')
        
        processed = self._get_processed_count(4)
@@ -1666,51 +1729,30 @@ class AutomationService:
        }
    
    def _get_processed_count(self, stage: int) -> int:
-        """Get count of items processed in current stage during this run"""
+        """
+        Get accurate processed count from stage result.
+        Uses stage-specific keys for correct counting instead of DB queries.
+        """
        if not self.run:
            return 0
        
-        # Count items that were updated during this run and changed status from pending
-        if stage == 1:
-            # Keywords that changed status from 'new' during this run
-            return Keywords.objects.filter(
-                site=self.site,
-                updated_at__gte=self.run.started_at
-            ).exclude(status='new').count()
-        elif stage == 2:
-            # Clusters that changed status from 'new' during this run
-            return Clusters.objects.filter(
-                site=self.site,
-                updated_at__gte=self.run.started_at
-            ).exclude(status='new').count()
-        elif stage == 3:
-            # Ideas that changed status from 'approved' during this run
-            return ContentIdeas.objects.filter(
-                site=self.site,
-                updated_at__gte=self.run.started_at
-            ).exclude(status='approved').count()
-        elif stage == 4:
-            # Tasks that changed status from 'ready'/'queued' during this run
-            return Tasks.objects.filter(
-                site=self.site,
-                updated_at__gte=self.run.started_at
-            ).exclude(status__in=['ready', 'queued']).count()
-        elif stage == 5:
-            # Content processed for image prompts during this run
-            return Content.objects.filter(
-                site=self.site,
-                updated_at__gte=self.run.started_at,
-                images__isnull=False
-            ).distinct().count()
-        elif stage == 6:
-            # Images completed during this run
-            return Images.objects.filter(
-                site=self.site,
-                updated_at__gte=self.run.started_at,
-                status='completed'
-            ).count()
+        # Get the stage result from the run
+        result = getattr(self.run, f'stage_{stage}_result', None)
+        if not result:
+            return 0
        
-        return 0
+        # Map stage to correct result key for processed count
+        key_map = {
+            1: 'keywords_processed',
+            2: 'clusters_processed',
+            3: 'ideas_processed',
+            4: 'tasks_processed',
+            5: 'content_processed',
+            6: 'images_processed',
+            7: 'ready_for_review'
+        }
+        
+        return result.get(key_map.get(stage, ''), 0)
    
    def _get_current_items(self, queryset, count: int) -> list:
        """Get currently processing items"""