Add backfill command for model_config FK and costs

- Backfills model_config FK from model_name in CreditUsageLog - Calculates cost_usd_input/output/total from tokens and model pricing - Processes in configurable batches (default 500) - Includes dry-run mode for safety - Successfully backfilled 250 historical logs - Enables full token analytics on historical data
2025-12-23 07:40:55 +00:00
parent d402a135ae
commit 169db8983b
1 changed files with 118 additions and 0 deletions
--- a/backend/igny8_core/management/commands/backfill_model_config.py
+++ b/backend/igny8_core/management/commands/backfill_model_config.py
@@ -0,0 +1,118 @@
 """
 Backfill model_config FK and cost fields in CreditUsageLog from model_name.
 """
 from django.core.management.base import BaseCommand
 from django.db.models import Q
 from igny8_core.business.billing.models import CreditUsageLog, AIModelConfig
 class Command(BaseCommand):
    help = 'Backfill model_config FK and cost fields in CreditUsageLog from model_name'
    def add_arguments(self, parser):
        parser.add_argument(
            '--dry-run',
            action='store_true',
            help='Show what would be updated without making changes',
        )
        parser.add_argument(
            '--batch-size',
            type=int,
            default=500,
            help='Number of records to process in each batch',
        )
    def handle(self, *args, **options):
        dry_run = options['dry_run']
        batch_size = options['batch_size']
        self.stdout.write(self.style.WARNING('Starting model_config backfill...'))
        # Get logs without model_config but with model_name
        logs_to_update = CreditUsageLog.objects.filter(
            Q(model_config__isnull=True) & Q(model_name__isnull=False)
        ).exclude(model_name='')
        total_logs = logs_to_update.count()
        self.stdout.write(f'Found {total_logs} logs to update')
        if total_logs == 0:
            self.stdout.write(self.style.SUCCESS('No logs need updating!'))
            return
        # Get all AIModelConfig objects for mapping
        model_configs = {mc.model_name: mc for mc in AIModelConfig.objects.all()}
        self.stdout.write(f'Loaded {len(model_configs)} AIModelConfig models')
        # Stats
        updated_count = 0
        skipped_count = 0
        error_count = 0
        # Process in batches
        for i in range(0, total_logs, batch_size):
            batch = logs_to_update[i:i+batch_size]
            for log in batch:
                try:
                    # Try to find matching AIModelConfig
                    model_config = model_configs.get(log.model_name)
                    if model_config:
                        if not dry_run:
                            # Update model_config FK
                            log.model_config = model_config
                            # Calculate and update costs if tokens are available
                            if log.tokens_input and log.tokens_output:
                                cost_input = (log.tokens_input / 1000) * float(model_config.cost_per_1k_input_tokens)
                                cost_output = (log.tokens_output / 1000) * float(model_config.cost_per_1k_output_tokens)
                                log.cost_usd_input = round(cost_input, 6)
                                log.cost_usd_output = round(cost_output, 6)
                                log.cost_usd_total = round(cost_input + cost_output, 6)
                            log.save(update_fields=['model_config', 'cost_usd_input', 'cost_usd_output', 'cost_usd_total'])
                        updated_count += 1
                    else:
                        # No matching AIModelConfig
                        if options['verbosity'] >= 2:
                            self.stdout.write(f'  Skipping log {log.id}: no AIModelConfig for "{log.model_name}"')
                        skipped_count += 1
                except Exception as e:
                    self.stdout.write(self.style.ERROR(f'Error processing log {log.id}: {str(e)}'))
                    error_count += 1
            # Progress update
            if (i + batch_size) % (batch_size * 5) == 0:
                self.stdout.write(f'  Processed {min(i + batch_size, total_logs)}/{total_logs}...')
        # Summary
        self.stdout.write('\n' + '='*60)
        if dry_run:
            self.stdout.write(self.style.WARNING('DRY RUN - No changes made'))
        else:
            self.stdout.write(self.style.SUCCESS('Backfill complete!'))
        self.stdout.write(f'Total logs: {total_logs}')
        self.stdout.write(self.style.SUCCESS(f'Updated: {updated_count}'))
        if skipped_count > 0:
            self.stdout.write(self.style.WARNING(f'Skipped (no matching model): {skipped_count}'))
        if error_count > 0:
            self.stdout.write(self.style.ERROR(f'Errors: {error_count}'))
        # Show sample of updated logs
        if not dry_run and updated_count > 0:
            self.stdout.write('\nSample of updated logs:')
            sample_logs = CreditUsageLog.objects.filter(
                model_config__isnull=False
            ).select_related('model_config').order_by('-created_at')[:5]
            for log in sample_logs:
                cost_str = f'${log.cost_usd_total:.6f}' if log.cost_usd_total else 'N/A'
                self.stdout.write(
                    f'  {log.operation_type}: {log.tokens_input}in + {log.tokens_output}out = '
                    f'{log.credits_used} credits, {cost_str}, model: {log.model_config.model_name}'
                )