igny8/backend/igny8_core/modules/planner/tasks.py

"""
Celery tasks for Planner module - AI clustering and idea generation
"""
import logging
import time
from typing import List
from django.db import transaction
from igny8_core.modules.planner.models import Keywords, Clusters, ContentIdeas
from igny8_core.utils.ai_processor import ai_processor
from igny8_core.ai.functions.generate_ideas import generate_ideas_core

logger = logging.getLogger(__name__)

# Try to import Celery, fall back to synchronous execution if not available
try:
    from celery import shared_task
    CELERY_AVAILABLE = True
except ImportError:
    CELERY_AVAILABLE = False
    # Create a mock decorator for synchronous execution
    def shared_task(*args, **kwargs):
        def decorator(func):
            return func
        return decorator


def _auto_cluster_keywords_core(keyword_ids: List[int], sector_id: int = None, account_id: int = None, progress_callback=None):
    """
    Core logic for clustering keywords. Can be called with or without Celery.

    Args:
        keyword_ids: List of keyword IDs to cluster
        sector_id: Sector ID for the keywords
        account_id: Account ID for account isolation
        progress_callback: Optional function to call for progress updates (for Celery tasks)
    """
    # Track request and response steps
    request_steps = []
    response_steps = []

    try:
        from igny8_core.auth.models import Sector

        # Initialize progress if callback provided
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': len(keyword_ids),
                    'percentage': 0,
                    'message': 'Initializing keyword clustering...',
                    'phase': 'initializing',
                    'request_steps': request_steps,
                    'response_steps': response_steps
                }
            )

        # Step 4: Keyword Loading & Validation
        step_start = time.time()
        keywords_queryset = Keywords.objects.filter(id__in=keyword_ids)
        if account_id:
            keywords_queryset = keywords_queryset.filter(account_id=account_id)
        if sector_id:
            keywords_queryset = keywords_queryset.filter(sector_id=sector_id)

        keywords = list(keywords_queryset.select_related('account', 'site', 'site__account', 'sector', 'sector__site'))

        if not keywords:
            logger.warning(f"No keywords found for clustering: {keyword_ids}")
            request_steps.append({
                'stepNumber': 4,
                'stepName': 'Keyword Loading & Validation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'error',
                'message': 'No keywords found',
                'error': 'No keywords found',
                'duration': int((time.time() - step_start) * 1000)
            })
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={'request_steps': request_steps, 'response_steps': response_steps}
                )
            return {'success': False, 'error': 'No keywords found', 'request_steps': request_steps, 'response_steps': response_steps}

        request_steps.append({
            'stepNumber': 4,
            'stepName': 'Keyword Loading & Validation',
            'functionName': '_auto_cluster_keywords_core',
            'status': 'success',
            'message': f'Loaded {len(keywords)} keywords',
            'duration': int((time.time() - step_start) * 1000)
        })

        total_keywords = len(keywords)

        # Step 5: Relationship Validation
        step_start = time.time()
        try:
            first_keyword = keywords[0]
            account = getattr(first_keyword, 'account', None)
            site = getattr(first_keyword, 'site', None)

            # If account is None, try to get it from site
            if not account and site:
                try:
                    account = getattr(site, 'account', None)
                except Exception:
                    pass

            sector = getattr(first_keyword, 'sector', None)

            # If site is None, try to get it from sector
            if not site and sector:
                try:
                    site = getattr(sector, 'site', None)
                except Exception:
                    pass

        except Exception as e:
            logger.error(f"Error accessing keyword relationships: {str(e)}")
            request_steps.append({
                'stepNumber': 5,
                'stepName': 'Relationship Validation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'error',
                'message': f'Error accessing relationships: {str(e)}',
                'error': str(e),
                'duration': int((time.time() - step_start) * 1000)
            })
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={'request_steps': request_steps, 'response_steps': response_steps}
                )
            return {'success': False, 'error': f'Invalid keyword data: {str(e)}', 'request_steps': request_steps, 'response_steps': response_steps}

        if not account:
            logger.error(f"No account found for keywords: {keyword_ids}. Keyword site: {getattr(first_keyword, 'site', None)}, Keyword account: {getattr(first_keyword, 'account', None)}")
            request_steps.append({
                'stepNumber': 5,
                'stepName': 'Relationship Validation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'error',
                'message': 'No account found',
                'error': 'No account found for keywords',
                'duration': int((time.time() - step_start) * 1000)
            })
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={'request_steps': request_steps, 'response_steps': response_steps}
                )
            return {'success': False, 'error': 'No account found for keywords. Please ensure keywords are properly associated with a site and account.', 'request_steps': request_steps, 'response_steps': response_steps}

        if not site:
            logger.error(f"No site found for keywords: {keyword_ids}. Keyword site: {getattr(first_keyword, 'site', None)}, Sector site: {getattr(sector, 'site', None) if sector else None}")
            request_steps.append({
                'stepNumber': 5,
                'stepName': 'Relationship Validation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'error',
                'message': 'No site found',
                'error': 'No site found for keywords',
                'duration': int((time.time() - step_start) * 1000)
            })
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={'request_steps': request_steps, 'response_steps': response_steps}
                )
            return {'success': False, 'error': 'No site found for keywords. Please ensure keywords are properly associated with a site.', 'request_steps': request_steps, 'response_steps': response_steps}

        request_steps.append({
            'stepNumber': 5,
            'stepName': 'Relationship Validation',
            'functionName': '_auto_cluster_keywords_core',
            'status': 'success',
            'message': f'Account: {account.id if account else None}, Site: {site.id if site else None}, Sector: {sector.id if sector else None}',
            'duration': int((time.time() - step_start) * 1000)
        })

        # Update progress: Analyzing keywords (0-40%)
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': total_keywords,
                    'percentage': 5,
                    'message': f'Preparing to analyze {total_keywords} keywords...',
                    'phase': 'preparing',
                    'request_steps': request_steps,
                    'response_steps': response_steps
                }
            )

        # Get sector name if available
        sector_name = sector.name if sector else None

        # Format keywords for AI
        keyword_data = [
            {
                'keyword': kw.keyword,
                'volume': kw.volume,
                'difficulty': kw.difficulty,
                'intent': kw.intent,
            }
            for kw in keywords
        ]

        # Update progress: Sending to AI (10-40%)
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': total_keywords,
                    'percentage': 10,
                    'message': 'Analyzing keyword relationships with AI...',
                    'phase': 'analyzing',
                    'request_steps': request_steps,
                    'response_steps': response_steps
                }
            )

        # Step 6: AIProcessor Creation
        step_start = time.time()
        from igny8_core.utils.ai_processor import AIProcessor
        try:
            # Log account info for debugging
            account_id = account.id if account else None
            account_name = account.name if account else None
            logger.info(f"Creating AIProcessor with account: id={account_id}, name={account_name}")

            processor = AIProcessor(account=account)

            # Log API key status
            has_api_key = bool(processor.openai_api_key)
            api_key_preview = processor.openai_api_key[:10] + "..." if processor.openai_api_key else "None"
            logger.info(f"AIProcessor created. Has API key: {has_api_key}, Preview: {api_key_preview}, Model: {processor.default_model}")

            request_steps.append({
                'stepNumber': 6,
                'stepName': 'AIProcessor Creation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'success',
                'message': f'AIProcessor created with account context (Account ID: {account_id}, Has API Key: {has_api_key})',
                'duration': int((time.time() - step_start) * 1000)
            })
        except Exception as e:
            logger.error(f"Error creating AIProcessor: {type(e).__name__}: {str(e)}", exc_info=True)
            request_steps.append({
                'stepNumber': 6,
                'stepName': 'AIProcessor Creation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'error',
                'message': f'Error creating AIProcessor: {str(e)}',
                'error': str(e),
                'duration': int((time.time() - step_start) * 1000)
            })
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={'request_steps': request_steps, 'response_steps': response_steps}
                )
            return {'success': False, 'error': f'Error creating AIProcessor: {str(e)}', 'request_steps': request_steps, 'response_steps': response_steps}

        # Step 7: AI Call Preparation
        step_start = time.time()
        try:
            # Check if API key is available
            if not processor.openai_api_key:
                # Try to debug why API key is missing
                logger.error(f"OpenAI API key not found for account {account.id if account else None}")
                # Check IntegrationSettings directly
                try:
                    from igny8_core.modules.system.models import IntegrationSettings
                    settings_obj = IntegrationSettings.objects.filter(
                        integration_type='openai',
                        account=account,
                        is_active=True
                    ).first()
                    if settings_obj:
                        logger.error(f"IntegrationSettings found but API key missing. Config keys: {list(settings_obj.config.keys()) if settings_obj.config else 'None'}")
                    else:
                        logger.error(f"No IntegrationSettings found for account {account.id if account else None}, integration_type='openai', is_active=True")
                except Exception as debug_error:
                    logger.error(f"Error checking IntegrationSettings: {str(debug_error)}", exc_info=True)
                request_steps.append({
                    'stepNumber': 7,
                    'stepName': 'AI Call Preparation',
                    'functionName': '_auto_cluster_keywords_core',
                    'status': 'error',
                    'message': 'OpenAI API key not configured',
                    'error': 'OpenAI API key not configured',
                    'duration': int((time.time() - step_start) * 1000)
                })
                if progress_callback:
                    progress_callback(
                        state='PROGRESS',
                        meta={'request_steps': request_steps, 'response_steps': response_steps}
                    )
                return {'success': False, 'error': 'OpenAI API key not configured', 'request_steps': request_steps, 'response_steps': response_steps}

            request_steps.append({
                'stepNumber': 7,
                'stepName': 'AI Call Preparation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'success',
                'message': f'Prepared {len(keyword_data)} keywords for AI analysis',
                'duration': int((time.time() - step_start) * 1000)
            })
        except Exception as e:
            request_steps.append({
                'stepNumber': 7,
                'stepName': 'AI Call Preparation',
                'functionName': '_auto_cluster_keywords_core',
                'status': 'error',
                'message': f'Error preparing AI call: {str(e)}',
                'error': str(e),
                'duration': int((time.time() - step_start) * 1000)
            })
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={'request_steps': request_steps, 'response_steps': response_steps}
                )
            return {'success': False, 'error': f'Error preparing AI call: {str(e)}', 'request_steps': request_steps, 'response_steps': response_steps}

        # Call AI with step tracking
        result = processor.cluster_keywords(keyword_data, sector_name=sector_name, account=account, response_steps=response_steps, progress_callback=progress_callback)

        if result.get('error'):
            logger.error(f"AI clustering error: {result['error']}")
            if progress_callback:
                progress_callback(
                    state='FAILURE',
                    meta={
                        'error': result['error'],
                        'message': f"Error: {result['error']}",
                        'request_steps': request_steps,
                        'response_steps': response_steps
                    }
                )
            return {'success': False, 'error': result['error'], 'request_steps': request_steps, 'response_steps': response_steps}

        # Update response_steps from result if available
        if result.get('response_steps'):
            response_steps.extend(result.get('response_steps', []))

        # Update progress: Creating clusters (40-90%)
        clusters_data = result.get('clusters', [])
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': total_keywords,
                    'percentage': 40,
                    'message': f'Creating {len(clusters_data)} clusters...',
                    'phase': 'creating_clusters',
                    'request_steps': request_steps,
                    'response_steps': response_steps
                }
            )

        clusters_created = 0
        keywords_updated = 0

        # Step 13: Database Transaction Start
        step_start = time.time()
        # Create/update clusters and assign keywords
        # Note: account and sector are already extracted above to avoid database queries inside transaction
        with transaction.atomic():
            if response_steps is not None:
                response_steps.append({
                    'stepNumber': 13,
                    'stepName': 'Database Transaction Start',
                    'functionName': '_auto_cluster_keywords_core',
                    'status': 'success',
                    'message': 'Transaction started',
                    'duration': int((time.time() - step_start) * 1000)
                })

            # Step 14: Cluster Creation/Update
            cluster_step_start = time.time()
            for idx, cluster_data in enumerate(clusters_data):
                cluster_name = cluster_data.get('name', '')
                cluster_keywords = cluster_data.get('keywords', [])

                if not cluster_name or not cluster_keywords:
                    continue

                # Update progress for each cluster
                if progress_callback:
                    progress_pct = 40 + int((idx / len(clusters_data)) * 50)
                    progress_callback(
                        state='PROGRESS',
                        meta={
                            'current': idx + 1,
                            'total': len(clusters_data),
                            'percentage': progress_pct,
                            'message': f"Creating cluster '{cluster_name}' ({idx + 1} of {len(clusters_data)})...",
                            'phase': 'creating_clusters',
                            'current_item': cluster_name,
                            'request_steps': request_steps,
                            'response_steps': response_steps
                        }
                    )

                # Get or create cluster
                # Note: Clusters model (SiteSectorBaseModel) requires both site and sector
                # Ensure site is always set (can be from sector.site if sector exists)
                cluster_site = site if site else (sector.site if sector and hasattr(sector, 'site') else None)

                if not cluster_site:
                    logger.error(f"Cannot create cluster '{cluster_name}': No site available. Keywords: {keyword_ids}")
                    continue

                if sector:
                    cluster, created = Clusters.objects.get_or_create(
                        name=cluster_name,
                        account=account,
                        site=cluster_site,
                        sector=sector,
                        defaults={
                            'description': cluster_data.get('description', ''),
                            'status': 'active',
                        }
                    )
                else:
                    # If no sector, create cluster without sector filter but still require site
                    cluster, created = Clusters.objects.get_or_create(
                        name=cluster_name,
                        account=account,
                        site=cluster_site,
                        sector__isnull=True,
                        defaults={
                            'description': cluster_data.get('description', ''),
                            'status': 'active',
                            'sector': None,
                        }
                    )

                if created:
                    clusters_created += 1

                # Step 15: Keyword Matching & Assignment
                kw_step_start = time.time()
                # Assign keywords to cluster
                # Match keywords by keyword string (case-insensitive) from the already-loaded keywords list
                # Also create a mapping for fuzzy matching (handles minor variations)
                matched_keyword_objects = []
                unmatched_keywords = []

                # Create normalized versions for exact matching
                cluster_keywords_normalized = {}
                for kw in cluster_keywords:
                    normalized = kw.strip().lower()
                    cluster_keywords_normalized[normalized] = kw.strip()  # Keep original for logging

                # Create a mapping of all available keywords (normalized)
                available_keywords_normalized = {
                    kw_obj.keyword.strip().lower(): kw_obj
                    for kw_obj in keywords
                }

                # First pass: exact matches (case-insensitive)
                for cluster_kw_normalized, cluster_kw_original in cluster_keywords_normalized.items():
                    if cluster_kw_normalized in available_keywords_normalized:
                        matched_keyword_objects.append(available_keywords_normalized[cluster_kw_normalized])
                    else:
                        unmatched_keywords.append(cluster_kw_original)

                # Log unmatched keywords for debugging
                if unmatched_keywords:
                    logger.warning(
                        f"Some keywords in cluster '{cluster_name}' were not matched: {unmatched_keywords}. "
                        f"Available keywords: {[kw.keyword for kw in keywords]}"
                    )

                # Update matched keywords
                if matched_keyword_objects:
                    matched_ids = [kw.id for kw in matched_keyword_objects]
                    # Rebuild queryset inside transaction to avoid database connection issues
                    # Handle sector=None case
                    keyword_filter = Keywords.objects.filter(
                        id__in=matched_ids,
                        account=account
                    )
                    if sector:
                        keyword_filter = keyword_filter.filter(sector=sector)
                    else:
                        keyword_filter = keyword_filter.filter(sector__isnull=True)

                    updated_count = keyword_filter.update(
                        cluster=cluster,
                        status='mapped'  # Update status from pending to mapped
                    )
                    keywords_updated += updated_count

            # Log steps 14 and 15 after all clusters are processed
            if response_steps is not None:
                response_steps.append({
                    'stepNumber': 14,
                    'stepName': 'Cluster Creation/Update',
                    'functionName': '_auto_cluster_keywords_core',
                    'status': 'success',
                    'message': f'Created/updated {clusters_created} clusters',
                    'duration': int((time.time() - cluster_step_start) * 1000)
                })
                response_steps.append({
                    'stepNumber': 15,
                    'stepName': 'Keyword Matching & Assignment',
                    'functionName': '_auto_cluster_keywords_core',
                    'status': 'success',
                    'message': f'Assigned {keywords_updated} keywords to clusters',
                    'duration': 0  # Duration already included in step 14
                })

            # Step 16: Metrics Recalculation & Commit
            step_start = time.time()
            # Update progress: Recalculating metrics (90-95%)
            if progress_callback:
                progress_callback(
                    state='PROGRESS',
                    meta={
                        'current': clusters_created,
                        'total': clusters_created,
                        'percentage': 90,
                        'message': 'Recalculating cluster metrics...',
                        'phase': 'finalizing',
                        'request_steps': request_steps,
                        'response_steps': response_steps
                    }
                )

            # Recalculate cluster metrics
            from django.db.models import Sum
            cluster_filter = Clusters.objects.filter(account=account)
            if sector:
                cluster_filter = cluster_filter.filter(sector=sector)
            else:
                cluster_filter = cluster_filter.filter(sector__isnull=True)

            for cluster in cluster_filter:
                cluster.keywords_count = Keywords.objects.filter(cluster=cluster).count()
                volume_sum = Keywords.objects.filter(cluster=cluster).aggregate(
                    total=Sum('volume')
                )['total']
                cluster.volume = volume_sum or 0
                cluster.save()

            # Transaction commits here automatically
            if response_steps is not None:
                response_steps.append({
                    'stepNumber': 16,
                    'stepName': 'Metrics Recalculation & Commit',
                    'functionName': '_auto_cluster_keywords_core',
                    'status': 'success',
                    'message': f'Recalculated metrics for {cluster_filter.count()} clusters, transaction committed',
                    'duration': int((time.time() - step_start) * 1000)
                })

        # Final progress update
        final_message = f"Clustering complete: {clusters_created} clusters created, {keywords_updated} keywords updated"
        logger.info(final_message)

        if progress_callback:
            progress_callback(
                state='SUCCESS',
                meta={
                    'message': final_message,
                    'request_steps': request_steps,
                    'response_steps': response_steps
                }
            )

        return {
            'success': True,
            'clusters_created': clusters_created,
            'keywords_updated': keywords_updated,
            'message': final_message,
            'request_steps': request_steps,
            'response_steps': response_steps,
        }

    except Exception as e:
        logger.error(f"Error in auto_cluster_keywords_core: {str(e)}", exc_info=True)
        if progress_callback:
            progress_callback(
                state='FAILURE',
                meta={
                    'error': str(e),
                    'message': f'Error: {str(e)}',
                    'request_steps': request_steps,
                    'response_steps': response_steps
                }
            )
        return {
            'success': False,
            'error': str(e),
            'request_steps': request_steps,
            'response_steps': response_steps
        }


@shared_task(bind=True, max_retries=3)
def auto_cluster_keywords_task(self, keyword_ids: List[int], sector_id: int = None, account_id: int = None):
    """
    Celery task wrapper for clustering keywords using AI.
    Calls the core function with progress callback.

    Args:
        keyword_ids: List of keyword IDs to cluster
        sector_id: Sector ID for the keywords
        account_id: Account ID for account isolation
    """
    logger.info("=" * 80)
    logger.info("auto_cluster_keywords_task STARTED")
    logger.info(f"  - Task ID: {self.request.id}")
    logger.info(f"  - keyword_ids: {keyword_ids}")
    logger.info(f"  - sector_id: {sector_id}")
    logger.info(f"  - account_id: {account_id}")
    logger.info("=" * 80)

    # Initialize request_steps and response_steps for error reporting
    request_steps = []
    response_steps = []

    def progress_callback(state, meta):
        # Capture request_steps and response_steps from meta if available
        nonlocal request_steps, response_steps
        if isinstance(meta, dict):
            if 'request_steps' in meta:
                request_steps = meta['request_steps']
            if 'response_steps' in meta:
                response_steps = meta['response_steps']
        self.update_state(state=state, meta=meta)

    try:
        result = _auto_cluster_keywords_core(keyword_ids, sector_id, account_id, progress_callback)
        logger.info(f"auto_cluster_keywords_task COMPLETED: {result}")
        return result
    except Exception as e:
        error_type = type(e).__name__
        error_msg = str(e)

        # Log full error details
        logger.error("=" * 80)
        logger.error(f"auto_cluster_keywords_task FAILED: {error_type}: {error_msg}")
        logger.error(f"  - Task ID: {self.request.id}")
        logger.error(f"  - keyword_ids: {keyword_ids}")
        logger.error(f"  - sector_id: {sector_id}")
        logger.error(f"  - account_id: {account_id}")
        logger.error("=" * 80, exc_info=True)

        # Create detailed error dict that Celery can serialize
        error_dict = {
            'error': error_msg,
            'error_type': error_type,
            'error_class': error_type,
            'message': f'{error_type}: {error_msg}',
            'request_steps': request_steps,
            'response_steps': response_steps,
            'task_id': str(self.request.id),
            'keyword_ids': keyword_ids,
            'sector_id': sector_id,
            'account_id': account_id
        }

        # Update task state with detailed error
        try:
            self.update_state(
                state='FAILURE',
                meta=error_dict
            )
        except Exception as update_error:
            # If update_state fails, log it but continue
            logger.error(f"Failed to update task state: {str(update_error)}")

        # Return error result
        return error_dict


@shared_task(bind=True, max_retries=3)
def auto_generate_ideas_task(self, cluster_ids: List[int], account_id: int = None):
    """
    Celery task to generate content ideas for clusters using AI.

    Args:
        cluster_ids: List of cluster IDs
        account_id: Account ID for account isolation
    """
    account_id = account_id

    logger.info("=" * 80)
    logger.info("auto_generate_ideas_task STARTED")
    logger.info(f"  - cluster_ids: {cluster_ids}")
    logger.info(f"  - account_id: {account_id}")
    logger.info("=" * 80)

    try:
        from django.db import models
        from django.db import connection

        # Log database connection status
        try:
            connection.ensure_connection()
            logger.info("Database connection: OK")
        except Exception as db_error:
            logger.error(f"Database connection error: {type(db_error).__name__}: {str(db_error)}")
            raise

        # Initialize progress
        logger.info("Initializing task progress state...")
        self.update_state(
            state='PROGRESS',
            meta={
                'current': 0,
                'total': len(cluster_ids),
                'percentage': 0,
                'message': 'Initializing content ideas generation...',
                'phase': 'initializing'
            }
        )

        # Get clusters with keywords and relationships (including site)
        logger.info(f"Querying clusters with IDs: {cluster_ids}")
        try:
            clusters_queryset = Clusters.objects.filter(id__in=cluster_ids)
            logger.info(f"Initial queryset count: {clusters_queryset.count()}")

            if account_id:
                clusters_queryset = clusters_queryset.filter(account_id=account_id)
                logger.info(f"After account filter count: {clusters_queryset.count()}")

            logger.info("Loading clusters with select_related...")
            clusters = list(clusters_queryset.select_related('sector', 'account', 'site', 'sector__site'))
            logger.info(f"Successfully loaded {len(clusters)} clusters")

            # Log each cluster's details
            for c in clusters:
                account = getattr(c, 'account', None)
                logger.info(f"  Cluster {c.id}: name='{c.name}', account_id={account.id if account else 'None'}, site_id={c.site_id if c.site else 'None'}, sector_id={c.sector_id if c.sector else 'None'}")
        except Exception as query_error:
            logger.error(f"Error querying clusters: {type(query_error).__name__}: {str(query_error)}", exc_info=True)
            raise

        if not clusters:
            logger.warning(f"No clusters found: {cluster_ids}")
            return {'success': False, 'error': 'No clusters found'}

        total_clusters = len(clusters)

        # Update progress: Preparing clusters (0-10%)
        self.update_state(
            state='PROGRESS',
            meta={
                'current': 0,
                'total': total_clusters,
                'percentage': 5,
                'message': f'Preparing {total_clusters} clusters for idea generation...',
                'phase': 'preparing'
            }
        )

        # Format cluster data for AI
        cluster_data = []
        for idx, cluster in enumerate(clusters):
            # Get keywords for this cluster
            keywords = Keywords.objects.filter(cluster=cluster).values_list('keyword', flat=True)
            keywords_list = list(keywords)

            cluster_item = {
                'id': cluster.id,
                'name': cluster.name,
                'description': cluster.description or '',
                'keywords': keywords_list,
            }
            cluster_data.append(cluster_item)

            # Log cluster data being sent to AI
            logger.info(f"Cluster {idx + 1}/{total_clusters} data for AI:")
            logger.info(f"  - ID: {cluster_item['id']}")
            logger.info(f"  - Name: {cluster_item['name']}")
            logger.info(f"  - Description: {cluster_item['description'][:100] if cluster_item['description'] else '(empty)'}...")
            logger.info(f"  - Keywords count: {len(keywords_list)}")
            logger.info(f"  - Keywords: {keywords_list[:5]}{'...' if len(keywords_list) > 5 else ''}")
            account = getattr(cluster, 'account', None)
            logger.info(f"  - Cluster account: {account.id if account else 'None'}")
            logger.info(f"  - Cluster site: {cluster.site_id if cluster.site else 'None'}")
            logger.info(f"  - Cluster sector: {cluster.sector_id if cluster.sector else 'None'}")

            # Update progress for each cluster preparation
            progress_pct = 5 + int((idx / total_clusters) * 5)
            self.update_state(
                state='PROGRESS',
                meta={
                    'current': idx + 1,
                    'total': total_clusters,
                    'percentage': progress_pct,
                    'message': f"Preparing cluster '{cluster.name}' ({idx + 1} of {total_clusters})...",
                    'phase': 'preparing',
                    'current_item': cluster.name
                }
            )

        # Log clean request data before sending to AI
        logger.info("=" * 80)
        logger.info("CLEAN REQUEST DATA FOR AI (before sending request):")
        logger.info("=" * 80)
        import json
        clean_data = {
            'total_clusters': len(cluster_data),
            'clusters': [
                {
                    'id': c['id'],
                    'name': c['name'],
                    'description': c['description'][:200] if c['description'] else '(empty)',
                    'keywords_count': len(c['keywords']),
                    'keywords': c['keywords'],
                }
                for c in cluster_data
            ]
        }
        logger.info(json.dumps(clean_data, indent=2))
        logger.info("=" * 80)

        # Update progress: Generating ideas with AI (10-80%)
        self.update_state(
            state='PROGRESS',
            meta={
                'current': 0,
                'total': total_clusters,
                'percentage': 10,
                'message': 'Generating content ideas with AI...',
                'phase': 'generating'
            }
        )

        # Create AIProcessor instance with account to load API keys from IntegrationSettings
        account = clusters[0].account if clusters else None
        from igny8_core.utils.ai_processor import AIProcessor
        processor = AIProcessor(account=account)

        logger.info(f"Calling AIProcessor.generate_ideas with {len(cluster_data)} clusters, account_id={account.id if account else None}")
        result = processor.generate_ideas(cluster_data, account=account)

        # Log AI response
        logger.info("=" * 80)
        logger.info("AI RESPONSE RECEIVED:")
        logger.info("=" * 80)
        if result.get('error'):
            logger.error(f"AI Error: {result['error']}")
        else:
            ideas = result.get('ideas', [])
            logger.info(f"Total ideas received: {len(ideas)}")
            for idx, idea in enumerate(ideas[:3]):  # Log first 3 ideas
                logger.info(f"Idea {idx + 1}:")
                logger.info(f"  - Title: {idea.get('title', 'N/A')}")
                logger.info(f"  - Content Type: {idea.get('content_type', 'N/A')}")
                logger.info(f"  - Content Structure: {idea.get('content_structure', 'N/A')}")
                logger.info(f"  - Cluster Name: {idea.get('cluster_name', 'N/A')}")
                logger.info(f"  - Cluster ID: {idea.get('cluster_id', 'N/A')}")
                logger.info(f"  - Target Keywords: {idea.get('target_keywords', idea.get('covered_keywords', 'N/A'))}")
                logger.info(f"  - Description type: {type(idea.get('description', '')).__name__}")
                if idx < 2:  # Only show full description for first 2
                    desc = idea.get('description', '')
                    if isinstance(desc, str):
                        logger.info(f"  - Description (first 200 chars): {desc[:200]}...")
                    else:
                        logger.info(f"  - Description (dict): {str(desc)[:200]}...")
        logger.info("=" * 80)

        if result.get('error'):
            logger.error(f"AI ideas generation error: {result['error']}")
            self.update_state(
                state='FAILURE',
                meta={
                    'error': result['error'],
                    'message': f"Error: {result['error']}"
                }
            )
            return {'success': False, 'error': result['error']}

        # Update progress: Saving ideas (80-95%)
        ideas_data = result.get('ideas', [])
        self.update_state(
            state='PROGRESS',
            meta={
                'current': 0,
                'total': len(ideas_data),
                'percentage': 80,
                'message': f'Saving {len(ideas_data)} generated ideas...',
                'phase': 'saving'
            }
        )

        ideas_created = 0

        # Create ContentIdeas records
        with transaction.atomic():
            for idx, idea_data in enumerate(ideas_data):
                logger.info(f"Processing idea {idx + 1}/{len(ideas_data)}: {idea_data.get('title', 'Untitled')}")

                cluster_name = idea_data.get('cluster_name', '')
                cluster_id_from_ai = idea_data.get('cluster_id')

                logger.info(f"  - Looking for cluster: name='{cluster_name}', id_from_ai={cluster_id_from_ai}")
                logger.info(f"  - Available clusters: {[(c.id, c.name) for c in clusters]}")

                # Find cluster - try by ID first, then by name
                cluster = None
                if cluster_id_from_ai:
                    for c in clusters:
                        if c.id == cluster_id_from_ai:
                            cluster = c
                            logger.info(f"  - Found cluster by ID: {c.id} - {c.name}")
                            break

                # Fallback to name matching if ID didn't work
                if not cluster and cluster_name:
                    for c in clusters:
                        if c.name == cluster_name:
                            cluster = c
                            logger.info(f"  - Found cluster by name: {c.id} - {c.name}")
                            break

                # If still no cluster, try to match by position (first idea goes to first cluster, etc.)
                if not cluster and len(clusters) > 0:
                    # Use modulo to distribute ideas across clusters
                    cluster_index = idx % len(clusters)
                    cluster = clusters[cluster_index]
                    logger.info(f"  - Cluster not found by name/ID, using cluster at index {cluster_index}: {cluster.id} - {cluster.name}")

                if not cluster:
                    logger.warning(f"Cluster not found for idea: {cluster_name or cluster_id_from_ai}, skipping")
                    continue

                # Ensure site is available (extract from cluster or sector)
                site = cluster.site
                if not site and cluster.sector:
                    site = cluster.sector.site

                logger.info(f"  - Cluster details:")
                logger.info(f"    - ID: {cluster.id}")
                logger.info(f"    - Name: {cluster.name}")
                account = getattr(cluster, 'account', None)
                logger.info(f"    - Account ID: {account.id if account else 'None'}")
                logger.info(f"    - Site ID: {cluster.site_id if cluster.site else 'None'}")
                logger.info(f"    - Site object: {site.id if site else 'None'}")
                logger.info(f"    - Sector ID: {cluster.sector_id if cluster.sector else 'None'}")

                if not site:
                    logger.error(f"Site not found for cluster {cluster.id} (site_id={cluster.site_id}, sector.site_id={cluster.sector.site_id if cluster.sector and cluster.sector.site else 'None'}), cannot create ContentIdeas")
                    continue

                # Update progress for each idea
                progress_pct = 80 + int((idx / len(ideas_data)) * 15)
                self.update_state(
                    state='PROGRESS',
                    meta={
                        'current': idx + 1,
                        'total': len(ideas_data),
                        'percentage': progress_pct,
                        'message': f"Saving idea '{idea_data.get('title', 'Untitled')}' ({idx + 1} of {len(ideas_data)})...",
                        'phase': 'saving',
                        'current_item': idea_data.get('title', 'Untitled')
                    }
                )

                # Handle description - it might be a dict (structured outline) or string
                description = idea_data.get('description', '')
                if isinstance(description, dict):
                    # Convert structured outline to JSON string
                    import json
                    description = json.dumps(description)
                    logger.info(f"  - Description converted from dict to JSON (length: {len(description)})")
                elif not isinstance(description, str):
                    description = str(description)
                    logger.info(f"  - Description converted to string (type was {type(idea_data.get('description', '')).__name__})")

                # Handle target_keywords - might be in covered_keywords or target_keywords
                target_keywords = idea_data.get('covered_keywords', '') or idea_data.get('target_keywords', '')

                # Prepare ContentIdeas record data
                # Get account
                account = getattr(cluster, 'account', None)

                idea_record_data = {
                    'idea_title': idea_data.get('title', 'Untitled Idea'),
                    'description': description,
                    'content_type': idea_data.get('content_type', 'blog_post'),
                    'content_structure': idea_data.get('content_structure', 'supporting_page'),
                    'target_keywords': target_keywords,
                    'keyword_cluster': cluster,
                    'estimated_word_count': idea_data.get('estimated_word_count', 1500),
                    'status': 'new',
                    'account': account,  # Use account field
                    'site': site,
                    'sector': cluster.sector,
                }

                logger.info(f"  - Creating ContentIdeas record with:")
                logger.info(f"    - idea_title: {idea_record_data['idea_title'][:50]}...")
                logger.info(f"    - content_type: {idea_record_data['content_type']}")
                logger.info(f"    - content_structure: {idea_record_data['content_structure']}")
                logger.info(f"    - account_id: {idea_record_data['account'].id if idea_record_data['account'] else 'None'}")
                logger.info(f"    - site_id: {idea_record_data['site'].id if idea_record_data['site'] else 'None'}")
                logger.info(f"    - sector_id: {idea_record_data['sector'].id if idea_record_data['sector'] else 'None'}")
                logger.info(f"    - keyword_cluster_id: {cluster.id}")

                try:
                    # Create ContentIdeas record
                    ContentIdeas.objects.create(**idea_record_data)
                    ideas_created += 1
                    logger.info(f"  - ✓ Successfully created ContentIdeas record")
                except Exception as create_error:
                    logger.error(f"  - ✗ Failed to create ContentIdeas record: {type(create_error).__name__}: {str(create_error)}")
                    logger.error(f"  - Error details: {create_error}", exc_info=True)
                    raise  # Re-raise to see the full traceback

        # Final progress update
        final_message = f"Ideas generation complete: {ideas_created} ideas created for {total_clusters} clusters"
        logger.info(final_message)

        return {
            'success': True,
            'ideas_created': ideas_created,
            'message': final_message,
        }

    except Exception as e:
        logger.error(f"Error in auto_generate_ideas_task: {str(e)}", exc_info=True)
        self.update_state(
            state='FAILURE',
            meta={
                'error': str(e),
                'message': f'Error: {str(e)}'
            }
        )
        raise


def _generate_single_idea_core(cluster_id: int, account_id: int = None, progress_callback=None):
    """
    Core logic for generating a single content idea for a cluster. Can be called with or without Celery.

    Args:
        cluster_id: Cluster ID to generate idea for
        account_id: Account ID for account isolation
        progress_callback: Optional function to call for progress updates (for Celery tasks)
    """
    account_id = account_id
    try:
        # Initialize progress if callback provided
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': 1,
                    'percentage': 0,
                    'message': 'Initializing single idea generation...',
                    'phase': 'initializing'
                }
            )

        # Get cluster with keywords and relationships
        clusters_queryset = Clusters.objects.filter(id=cluster_id)
        if account_id:
            clusters_queryset = clusters_queryset.filter(account_id=account_id)

        clusters = list(clusters_queryset.select_related('sector', 'account', 'site').prefetch_related('keywords'))

        if not clusters:
            logger.warning(f"Cluster not found: {cluster_id}")
            return {'success': False, 'error': 'Cluster not found'}

        cluster = clusters[0]

        # Update progress: Preparing cluster (0-10%)
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': 1,
                    'percentage': 5,
                    'message': f'Preparing cluster "{cluster.name}"...',
                    'phase': 'preparing',
                    'current_item': cluster.name
                }
            )

        # Get keywords for this cluster
        keywords = Keywords.objects.filter(cluster=cluster).values_list('keyword', flat=True)

        # Format cluster data for AI
        cluster_data = [{
            'id': cluster.id,
            'name': cluster.name,
            'description': cluster.description or '',
            'keywords': list(keywords),
        }]

        # Update progress: Generating idea with AI (10-80%)
        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 0,
                    'total': 1,
                    'percentage': 10,
                    'message': 'Generating content idea with AI...',
                    'phase': 'generating'
                }
            )

        # Create AIProcessor instance with account to load API keys from IntegrationSettings
        account = getattr(cluster, 'account', None)
        from igny8_core.utils.ai_processor import AIProcessor
        processor = AIProcessor(account=account)
        result = processor.generate_ideas(cluster_data, account=account)

        if result.get('error'):
            logger.error(f"AI idea generation error: {result['error']}")
            return {'success': False, 'error': result['error']}

        # Update progress: Saving idea (80-95%)
        ideas_data = result.get('ideas', [])
        if not ideas_data:
            logger.warning(f"No ideas generated for cluster: {cluster.name}")
            return {'success': False, 'error': 'No ideas generated by AI'}

        # Take the first idea (since we're generating for a single cluster)
        idea_data = ideas_data[0]

        if progress_callback:
            progress_callback(
                state='PROGRESS',
                meta={
                    'current': 1,
                    'total': 1,
                    'percentage': 80,
                    'message': f"Saving idea '{idea_data.get('title', 'Untitled')}'...",
                    'phase': 'saving',
                    'current_item': idea_data.get('title', 'Untitled')
                }
            )

        idea_created = 0

        # Create ContentIdeas record
        with transaction.atomic():
            # Handle description - it might be a dict (structured outline) or string
            description = idea_data.get('description', '')
            if isinstance(description, dict):
                # Convert structured outline to JSON string
                import json
                description = json.dumps(description)
            elif not isinstance(description, str):
                description = str(description)

            # Handle target_keywords - might be in covered_keywords or target_keywords
            target_keywords = idea_data.get('covered_keywords', '') or idea_data.get('target_keywords', '')

            # Get account
            account = getattr(cluster, 'account', None)

            # Create ContentIdeas record
            ContentIdeas.objects.create(
                idea_title=idea_data.get('title', 'Untitled Idea'),
                description=description,
                content_type=idea_data.get('content_type', 'blog_post'),
                content_structure=idea_data.get('content_structure', 'supporting_page'),
                target_keywords=target_keywords,
                keyword_cluster=cluster,
                estimated_word_count=idea_data.get('estimated_word_count', 1500),
                status='new',
                account=account,  # Use account field
                site=cluster.site,
                sector=cluster.sector,
            )
            idea_created = 1

        # Final progress update
        final_message = f"Idea generation complete: '{idea_data.get('title', 'Untitled Idea')}' created"
        logger.info(final_message)

        if progress_callback:
            progress_callback(
                state='SUCCESS',
                meta={
                    'current': 1,
                    'total': 1,
                    'percentage': 100,
                    'message': final_message,
                    'phase': 'completed'
                }
            )

        return {
            'success': True,
            'idea_created': idea_created,
            'message': final_message,
        }

    except Exception as e:
        logger.error(f"Error in _generate_single_idea_core: {str(e)}", exc_info=True)
        if progress_callback:
            progress_callback(
                state='FAILURE',
                meta={
                    'error': str(e),
                    'message': f'Error: {str(e)}'
                }
            )
        return {'success': False, 'error': str(e)}


@shared_task(bind=True, max_retries=3)
def generate_single_idea_task(self, cluster_id: int, account_id: int = None):
    """
    Celery task to generate a single content idea for a cluster using AI.

    Args:
        cluster_id: Cluster ID
        account_id: Account ID for account isolation
    """
    def progress_callback(state, meta):
        self.update_state(state=state, meta=meta)

    return _generate_single_idea_core(cluster_id, account_id, progress_callback)