igny8/test_max_tokens.py

#!/usr/bin/env python
"""
Test script to verify if AI model can actually use 8192+ max_tokens
"""
import os
import sys
import django

# Setup Django
sys.path.insert(0, '/app')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')
django.setup()

import requests
import json
from igny8_core.modules.system.models import IntegrationSettings
from igny8_core.auth.models import Account

# Get API configuration
account = Account.objects.filter(slug='aws-admin').first()
settings = IntegrationSettings.objects.filter(
    integration_type='openai',
    account=account,
    is_active=True
).first()

config = settings.config or {}
api_key = config.get('apiKey')
model = config.get('model', 'gpt-4o')

print(f"🧪 Testing AI Model: {model}")
print("=" * 70)
print()

# Test with a prompt requiring long response
test_prompt = """Write a comprehensive 1500-word article about "The Benefits of Organic Cotton Bedding" with these 7 sections:

1. Introduction (200 words)
2. Health and Skin Benefits (250 words)
3. Environmental Sustainability (250 words)
4. Quality and Durability (250 words)
5. Cost Analysis (200 words)
6. Buying Guide (250 words)
7. Conclusion (100 words)

Include specific examples, data points, and detailed comparisons in each section."""

tests = [
    {"name": "Test with max_tokens=4096", "max_tokens": 4096},
    {"name": "Test with max_tokens=8192", "max_tokens": 8192},
    {"name": "Test with max_tokens=16384", "max_tokens": 16384},
]

results = []

for test in tests:
    print(f"\n{test['name']}")
    print("-" * 70)

    try:
        response = requests.post(
            'https://api.openai.com/v1/chat/completions',
            headers={
                'Authorization': f'Bearer {api_key}',
                'Content-Type': 'application/json',
            },
            json={
                'model': model,
                'messages': [{'role': 'user', 'content': test_prompt}],
                'max_tokens': test['max_tokens'],
                'temperature': 0.7,
            },
            timeout=120
        )

        if response.status_code == 200:
            data = response.json()
            usage = data.get('usage', {})
            finish_reason = data['choices'][0].get('finish_reason', 'unknown')

            result = {
                'max_tokens': test['max_tokens'],
                'prompt_tokens': usage.get('prompt_tokens', 0),
                'completion_tokens': usage.get('completion_tokens', 0),
                'total_tokens': usage.get('total_tokens', 0),
                'finish_reason': finish_reason,
                'status': 'success'
            }
            results.append(result)

            print(f"✓ Response received")
            print(f"  Prompt Tokens: {result['prompt_tokens']}")
            print(f"  Completion Tokens: {result['completion_tokens']}")
            print(f"  Total Tokens: {result['total_tokens']}")
            print(f"  Finish Reason: {finish_reason}")

            if finish_reason == 'length':
                print(f"  ⚠️  TRUNCATED: Hit the max_tokens={test['max_tokens']} limit!")
            elif finish_reason == 'stop':
                print(f"  ✅ COMPLETE: Response finished naturally")
        else:
            print(f"✗ API Error: {response.status_code}")
            error_data = response.json()
            print(f"  Error: {error_data.get('error', {}).get('message', 'Unknown error')}")
            results.append({
                'max_tokens': test['max_tokens'],
                'status': 'error',
                'error': error_data.get('error', {}).get('message', 'Unknown error')
            })

    except Exception as e:
        print(f"✗ Exception: {str(e)}")
        results.append({
            'max_tokens': test['max_tokens'],
            'status': 'exception',
            'error': str(e)
        })

# Print summary
print("\n" + "=" * 70)
print("📊 SUMMARY")
print("=" * 70)

success_results = [r for r in results if r.get('status') == 'success']

if len(success_results) >= 2:
    for i, result in enumerate(success_results):
        status_icon = "⚠️ " if result['finish_reason'] == 'length' else "✅"
        print(f"{status_icon} max_tokens={result['max_tokens']:5d}: {result['completion_tokens']:5d} tokens generated ({result['finish_reason']})")

    print()
    # Compare results
    if success_results[0]['finish_reason'] == 'length' and success_results[-1]['finish_reason'] == 'stop':
        print("✅ VERIFIED: Higher max_tokens allows longer, complete responses!")
        print(f"   Model can generate {success_results[-1]['completion_tokens']} tokens without truncation")
    elif success_results[-1]['completion_tokens'] > success_results[0]['max_tokens']:
        print(f"✅ VERIFIED: Model generated {success_results[-1]['completion_tokens']} tokens")
        print(f"   This exceeds the {success_results[0]['max_tokens']} limit from first test")
    else:
        print("ℹ️  All tests completed but responses were similar in length")
        print("   The prompt may not require more than ~4000 tokens")

print()
print("💡 RECOMMENDATION:")
if any(r.get('finish_reason') == 'length' and r.get('max_tokens', 0) <= 8192 for r in success_results):
    print("   Content generation needs max_tokens > 8192 for long articles (1200+ words)")
    print("   Suggest: Set max_tokens to 16384 for content_generation function")
else:
    print("   Current max_tokens=8192 appears sufficient for most content")