Add backfill command for model_config FK and costs
- Backfills model_config FK from model_name in CreditUsageLog - Calculates cost_usd_input/output/total from tokens and model pricing - Processes in configurable batches (default 500) - Includes dry-run mode for safety - Successfully backfilled 250 historical logs - Enables full token analytics on historical data
This commit is contained in:
118
backend/igny8_core/management/commands/backfill_model_config.py
Normal file
118
backend/igny8_core/management/commands/backfill_model_config.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
"""
|
||||||
|
Backfill model_config FK and cost fields in CreditUsageLog from model_name.
|
||||||
|
"""
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.db.models import Q
|
||||||
|
from igny8_core.business.billing.models import CreditUsageLog, AIModelConfig
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Backfill model_config FK and cost fields in CreditUsageLog from model_name'
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
'--dry-run',
|
||||||
|
action='store_true',
|
||||||
|
help='Show what would be updated without making changes',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch-size',
|
||||||
|
type=int,
|
||||||
|
default=500,
|
||||||
|
help='Number of records to process in each batch',
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
dry_run = options['dry_run']
|
||||||
|
batch_size = options['batch_size']
|
||||||
|
|
||||||
|
self.stdout.write(self.style.WARNING('Starting model_config backfill...'))
|
||||||
|
|
||||||
|
# Get logs without model_config but with model_name
|
||||||
|
logs_to_update = CreditUsageLog.objects.filter(
|
||||||
|
Q(model_config__isnull=True) & Q(model_name__isnull=False)
|
||||||
|
).exclude(model_name='')
|
||||||
|
|
||||||
|
total_logs = logs_to_update.count()
|
||||||
|
self.stdout.write(f'Found {total_logs} logs to update')
|
||||||
|
|
||||||
|
if total_logs == 0:
|
||||||
|
self.stdout.write(self.style.SUCCESS('No logs need updating!'))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get all AIModelConfig objects for mapping
|
||||||
|
model_configs = {mc.model_name: mc for mc in AIModelConfig.objects.all()}
|
||||||
|
self.stdout.write(f'Loaded {len(model_configs)} AIModelConfig models')
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
updated_count = 0
|
||||||
|
skipped_count = 0
|
||||||
|
error_count = 0
|
||||||
|
|
||||||
|
# Process in batches
|
||||||
|
for i in range(0, total_logs, batch_size):
|
||||||
|
batch = logs_to_update[i:i+batch_size]
|
||||||
|
|
||||||
|
for log in batch:
|
||||||
|
try:
|
||||||
|
# Try to find matching AIModelConfig
|
||||||
|
model_config = model_configs.get(log.model_name)
|
||||||
|
|
||||||
|
if model_config:
|
||||||
|
if not dry_run:
|
||||||
|
# Update model_config FK
|
||||||
|
log.model_config = model_config
|
||||||
|
|
||||||
|
# Calculate and update costs if tokens are available
|
||||||
|
if log.tokens_input and log.tokens_output:
|
||||||
|
cost_input = (log.tokens_input / 1000) * float(model_config.cost_per_1k_input_tokens)
|
||||||
|
cost_output = (log.tokens_output / 1000) * float(model_config.cost_per_1k_output_tokens)
|
||||||
|
|
||||||
|
log.cost_usd_input = round(cost_input, 6)
|
||||||
|
log.cost_usd_output = round(cost_output, 6)
|
||||||
|
log.cost_usd_total = round(cost_input + cost_output, 6)
|
||||||
|
|
||||||
|
log.save(update_fields=['model_config', 'cost_usd_input', 'cost_usd_output', 'cost_usd_total'])
|
||||||
|
|
||||||
|
updated_count += 1
|
||||||
|
else:
|
||||||
|
# No matching AIModelConfig
|
||||||
|
if options['verbosity'] >= 2:
|
||||||
|
self.stdout.write(f' Skipping log {log.id}: no AIModelConfig for "{log.model_name}"')
|
||||||
|
skipped_count += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.stdout.write(self.style.ERROR(f'Error processing log {log.id}: {str(e)}'))
|
||||||
|
error_count += 1
|
||||||
|
|
||||||
|
# Progress update
|
||||||
|
if (i + batch_size) % (batch_size * 5) == 0:
|
||||||
|
self.stdout.write(f' Processed {min(i + batch_size, total_logs)}/{total_logs}...')
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
self.stdout.write('\n' + '='*60)
|
||||||
|
if dry_run:
|
||||||
|
self.stdout.write(self.style.WARNING('DRY RUN - No changes made'))
|
||||||
|
else:
|
||||||
|
self.stdout.write(self.style.SUCCESS('Backfill complete!'))
|
||||||
|
|
||||||
|
self.stdout.write(f'Total logs: {total_logs}')
|
||||||
|
self.stdout.write(self.style.SUCCESS(f'Updated: {updated_count}'))
|
||||||
|
if skipped_count > 0:
|
||||||
|
self.stdout.write(self.style.WARNING(f'Skipped (no matching model): {skipped_count}'))
|
||||||
|
if error_count > 0:
|
||||||
|
self.stdout.write(self.style.ERROR(f'Errors: {error_count}'))
|
||||||
|
|
||||||
|
# Show sample of updated logs
|
||||||
|
if not dry_run and updated_count > 0:
|
||||||
|
self.stdout.write('\nSample of updated logs:')
|
||||||
|
sample_logs = CreditUsageLog.objects.filter(
|
||||||
|
model_config__isnull=False
|
||||||
|
).select_related('model_config').order_by('-created_at')[:5]
|
||||||
|
|
||||||
|
for log in sample_logs:
|
||||||
|
cost_str = f'${log.cost_usd_total:.6f}' if log.cost_usd_total else 'N/A'
|
||||||
|
self.stdout.write(
|
||||||
|
f' {log.operation_type}: {log.tokens_input}in + {log.tokens_output}out = '
|
||||||
|
f'{log.credits_used} credits, {cost_str}, model: {log.model_config.model_name}'
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user