Add backfill command for model_config FK and costs

- Backfills model_config FK from model_name in CreditUsageLog - Calculates cost_usd_input/output/total from tokens and model pricing - Processes in configurable batches (default 500) - Includes dry-run mode for safety - Successfully backfilled 250 historical logs - Enables full token analytics on historical data
2025-12-23 07:40:55 +00:00
parent d402a135ae
commit 169db8983b
1 changed files with 118 additions and 0 deletions
--- a/backend/igny8_core/management/commands/backfill_model_config.py
+++ b/backend/igny8_core/management/commands/backfill_model_config.py
@@ -0,0 +1,118 @@
+"""
+Backfill model_config FK and cost fields in CreditUsageLog from model_name.
+"""
+from django.core.management.base import BaseCommand
+from django.db.models import Q
+from igny8_core.business.billing.models import CreditUsageLog, AIModelConfig
+
+
+class Command(BaseCommand):
+    help = 'Backfill model_config FK and cost fields in CreditUsageLog from model_name'
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            '--dry-run',
+            action='store_true',
+            help='Show what would be updated without making changes',
+        )
+        parser.add_argument(
+            '--batch-size',
+            type=int,
+            default=500,
+            help='Number of records to process in each batch',
+        )
+
+    def handle(self, *args, **options):
+        dry_run = options['dry_run']
+        batch_size = options['batch_size']
+        
+        self.stdout.write(self.style.WARNING('Starting model_config backfill...'))
+        
+        # Get logs without model_config but with model_name
+        logs_to_update = CreditUsageLog.objects.filter(
+            Q(model_config__isnull=True) & Q(model_name__isnull=False)
+        ).exclude(model_name='')
+        
+        total_logs = logs_to_update.count()
+        self.stdout.write(f'Found {total_logs} logs to update')
+        
+        if total_logs == 0:
+            self.stdout.write(self.style.SUCCESS('No logs need updating!'))
+            return
+        
+        # Get all AIModelConfig objects for mapping
+        model_configs = {mc.model_name: mc for mc in AIModelConfig.objects.all()}
+        self.stdout.write(f'Loaded {len(model_configs)} AIModelConfig models')
+        
+        # Stats
+        updated_count = 0
+        skipped_count = 0
+        error_count = 0
+        
+        # Process in batches
+        for i in range(0, total_logs, batch_size):
+            batch = logs_to_update[i:i+batch_size]
+            
+            for log in batch:
+                try:
+                    # Try to find matching AIModelConfig
+                    model_config = model_configs.get(log.model_name)
+                    
+                    if model_config:
+                        if not dry_run:
+                            # Update model_config FK
+                            log.model_config = model_config
+                            
+                            # Calculate and update costs if tokens are available
+                            if log.tokens_input and log.tokens_output:
+                                cost_input = (log.tokens_input / 1000) * float(model_config.cost_per_1k_input_tokens)
+                                cost_output = (log.tokens_output / 1000) * float(model_config.cost_per_1k_output_tokens)
+                                
+                                log.cost_usd_input = round(cost_input, 6)
+                                log.cost_usd_output = round(cost_output, 6)
+                                log.cost_usd_total = round(cost_input + cost_output, 6)
+                            
+                            log.save(update_fields=['model_config', 'cost_usd_input', 'cost_usd_output', 'cost_usd_total'])
+                        
+                        updated_count += 1
+                    else:
+                        # No matching AIModelConfig
+                        if options['verbosity'] >= 2:
+                            self.stdout.write(f'  Skipping log {log.id}: no AIModelConfig for "{log.model_name}"')
+                        skipped_count += 1
+                        
+                except Exception as e:
+                    self.stdout.write(self.style.ERROR(f'Error processing log {log.id}: {str(e)}'))
+                    error_count += 1
+            
+            # Progress update
+            if (i + batch_size) % (batch_size * 5) == 0:
+                self.stdout.write(f'  Processed {min(i + batch_size, total_logs)}/{total_logs}...')
+        
+        # Summary
+        self.stdout.write('\n' + '='*60)
+        if dry_run:
+            self.stdout.write(self.style.WARNING('DRY RUN - No changes made'))
+        else:
+            self.stdout.write(self.style.SUCCESS('Backfill complete!'))
+        
+        self.stdout.write(f'Total logs: {total_logs}')
+        self.stdout.write(self.style.SUCCESS(f'Updated: {updated_count}'))
+        if skipped_count > 0:
+            self.stdout.write(self.style.WARNING(f'Skipped (no matching model): {skipped_count}'))
+        if error_count > 0:
+            self.stdout.write(self.style.ERROR(f'Errors: {error_count}'))
+        
+        # Show sample of updated logs
+        if not dry_run and updated_count > 0:
+            self.stdout.write('\nSample of updated logs:')
+            sample_logs = CreditUsageLog.objects.filter(
+                model_config__isnull=False
+            ).select_related('model_config').order_by('-created_at')[:5]
+            
+            for log in sample_logs:
+                cost_str = f'${log.cost_usd_total:.6f}' if log.cost_usd_total else 'N/A'
+                self.stdout.write(
+                    f'  {log.operation_type}: {log.tokens_input}in + {log.tokens_output}out = '
+                    f'{log.credits_used} credits, {cost_str}, model: {log.model_config.model_name}'
+                )