ai fucntiosn adn otehr atuoamtion fixes

This commit is contained in:
IGNY8 VPS (Salman)
2026-01-14 23:08:48 +00:00
parent cb2d109593
commit 6bb3dd3df4
11 changed files with 1289 additions and 197 deletions

View File

@@ -172,6 +172,22 @@ class AutomationService:
total_count = pending_keywords.count()
# IMPORTANT: Group keywords by sector to avoid mixing sectors in clustering
# Each sector's keywords must be processed separately
from collections import defaultdict
keywords_by_sector = defaultdict(list)
for kw_id, sector_id in pending_keywords.values_list('id', 'sector_id'):
# Use sector_id or 'no_sector' for keywords without a sector
key = sector_id if sector_id else 'no_sector'
keywords_by_sector[key].append(kw_id)
sector_count = len(keywords_by_sector)
if sector_count > 1:
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Keywords span {sector_count} sectors - will process each sector separately"
)
# NEW: Pre-stage validation for minimum keywords
from igny8_core.ai.validators.cluster_validators import validate_minimum_keywords
@@ -229,20 +245,19 @@ class AutomationService:
# Process in batches with dynamic sizing
batch_size = self.config.stage_1_batch_size
# FIXED: Use min() for dynamic batch sizing
actual_batch_size = min(total_count, batch_size)
keywords_processed = 0
clusters_created = 0
batches_run = 0
credits_before = self._get_credits_used()
keyword_ids = list(pending_keywords.values_list('id', flat=True))
# Get total keyword count for progress tracking
total_keyword_count = sum(len(ids) for ids in keywords_by_sector.values())
# INITIAL SAVE: Set keywords_total immediately so frontend shows accurate counts from start
self.run.stage_1_result = {
'keywords_processed': 0,
'keywords_total': len(keyword_ids),
'keywords_total': total_keyword_count,
'clusters_created': 0,
'batches_run': 0,
'credits_used': 0,
@@ -251,17 +266,28 @@ class AutomationService:
}
self.run.save(update_fields=['stage_1_result'])
for i in range(0, len(keyword_ids), actual_batch_size):
# Check if automation should stop (paused or cancelled)
should_stop, reason = self._check_should_stop()
if should_stop:
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Stage {reason} - saving progress ({keywords_processed} keywords processed)"
)
# Save current progress
credits_used = self._get_credits_used() - credits_before
time_elapsed = self._format_time_elapsed(start_time)
# Process each sector's keywords separately to avoid mixing sectors
for sector_idx, (sector_key, sector_keyword_ids) in enumerate(keywords_by_sector.items()):
sector_name = f"Sector {sector_key}" if sector_key != 'no_sector' else "No Sector"
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Processing {sector_name} ({len(sector_keyword_ids)} keywords) [{sector_idx + 1}/{len(keywords_by_sector)}]"
)
# Dynamic batch sizing per sector
actual_batch_size = min(len(sector_keyword_ids), batch_size)
for i in range(0, len(sector_keyword_ids), actual_batch_size):
# Check if automation should stop (paused or cancelled)
should_stop, reason = self._check_should_stop()
if should_stop:
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Stage {reason} - saving progress ({keywords_processed} keywords processed)"
)
# Save current progress
credits_used = self._get_credits_used() - credits_before
time_elapsed = self._format_time_elapsed(start_time)
self.run.stage_1_result = {
'keywords_processed': keywords_processed,
'clusters_created': clusters_created,
@@ -275,92 +301,92 @@ class AutomationService:
self.run.save()
return
try:
batch = keyword_ids[i:i + actual_batch_size]
batch_num = (i // actual_batch_size) + 1
total_batches = (len(keyword_ids) + actual_batch_size - 1) // actual_batch_size
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Processing batch {batch_num}/{total_batches} ({len(batch)} keywords)"
)
# Call AI function via AIEngine (runs synchronously - no Celery subtask)
engine = AIEngine(account=self.account)
result = engine.execute(
fn=AutoClusterFunction(),
payload={'ids': batch}
)
# NOTE: AIEngine.execute() runs synchronously and returns immediately
# No Celery task polling needed
if not result.get('success'):
error_msg = result.get('error', 'Unknown error')
logger.warning(f"[AutomationService] Clustering failed for batch {batch_num}: {error_msg}")
# Continue to next batch
keywords_processed += len(batch)
batches_run += 1
# Log progress
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Batch {batch_num} complete"
)
# INCREMENTAL SAVE: Update stage result after each batch for real-time UI progress
clusters_so_far = Clusters.objects.filter(
site=self.site,
created_at__gte=self.run.started_at
).count()
self.run.stage_1_result = {
'keywords_processed': keywords_processed,
'keywords_total': len(keyword_ids),
'clusters_created': clusters_so_far,
'batches_run': batches_run,
'credits_used': self._get_credits_used() - credits_before,
'time_elapsed': self._format_time_elapsed(start_time),
'in_progress': True
}
self.run.save(update_fields=['stage_1_result'])
# Emit per-item trace event for UI progress tracking
try:
self.logger.append_trace(self.account.id, self.site.id, self.run.run_id, {
'event': 'stage_item_processed',
'run_id': self.run.run_id,
'stage': stage_number,
'processed': keywords_processed,
'total': len(keyword_ids),
'batch_num': batch_num,
'timestamp': datetime.now().isoformat()
})
except Exception:
pass
except Exception as e:
# FIXED: Log error but continue processing remaining batches
error_msg = f"Failed to process batch {batch_num}: {str(e)}"
logger.error(f"[AutomationService] {error_msg}", exc_info=True)
self.logger.log_stage_error(
self.run.run_id, self.account.id, self.site.id,
stage_number, error_msg
)
# Continue to next batch
continue
# ADDED: Within-stage delay (between batches)
if i + actual_batch_size < len(keyword_ids): # Not the last batch
delay = self.config.within_stage_delay
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Waiting {delay} seconds before next batch..."
)
time.sleep(delay)
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, "Delay complete, resuming processing"
)
batch = sector_keyword_ids[i:i + actual_batch_size]
batch_num = (i // actual_batch_size) + 1
total_batches = (len(sector_keyword_ids) + actual_batch_size - 1) // actual_batch_size
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Processing {sector_name} batch {batch_num}/{total_batches} ({len(batch)} keywords)"
)
# Call AI function via AIEngine (runs synchronously - no Celery subtask)
engine = AIEngine(account=self.account)
result = engine.execute(
fn=AutoClusterFunction(),
payload={'ids': batch}
)
# NOTE: AIEngine.execute() runs synchronously and returns immediately
# No Celery task polling needed
if not result.get('success'):
error_msg = result.get('error', 'Unknown error')
logger.warning(f"[AutomationService] Clustering failed for {sector_name} batch {batch_num}: {error_msg}")
# Continue to next batch
keywords_processed += len(batch)
batches_run += 1
# Log progress
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"{sector_name} batch {batch_num} complete"
)
# INCREMENTAL SAVE: Update stage result after each batch for real-time UI progress
clusters_so_far = Clusters.objects.filter(
site=self.site,
created_at__gte=self.run.started_at
).count()
self.run.stage_1_result = {
'keywords_processed': keywords_processed,
'keywords_total': total_keyword_count,
'clusters_created': clusters_so_far,
'batches_run': batches_run,
'credits_used': self._get_credits_used() - credits_before,
'time_elapsed': self._format_time_elapsed(start_time),
'in_progress': True
}
self.run.save(update_fields=['stage_1_result'])
# Emit per-item trace event for UI progress tracking
try:
self.logger.append_trace(self.account.id, self.site.id, self.run.run_id, {
'event': 'stage_item_processed',
'run_id': self.run.run_id,
'stage': stage_number,
'processed': keywords_processed,
'total': total_keyword_count,
'batch_num': batch_num,
'timestamp': datetime.now().isoformat()
})
except Exception:
pass
except Exception as e:
# FIXED: Log error but continue processing remaining batches
error_msg = f"Failed to process {sector_name} batch {batch_num}: {str(e)}"
logger.error(f"[AutomationService] {error_msg}", exc_info=True)
self.logger.log_stage_error(
self.run.run_id, self.account.id, self.site.id,
stage_number, error_msg
)
# Continue to next batch
continue
# ADDED: Within-stage delay (between batches)
if i + actual_batch_size < len(sector_keyword_ids): # Not the last batch in this sector
delay = self.config.within_stage_delay
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, f"Waiting {delay} seconds before next batch..."
)
time.sleep(delay)
self.logger.log_stage_progress(
self.run.run_id, self.account.id, self.site.id,
stage_number, "Delay complete, resuming processing"
)
# Get clusters created count
clusters_created = Clusters.objects.filter(