150 lines
5.4 KiB
Python
150 lines
5.4 KiB
Python
#!/usr/bin/env python
|
||
"""
|
||
Test script to verify if AI model can actually use 8192+ max_tokens
|
||
"""
|
||
import os
|
||
import sys
|
||
import django
|
||
|
||
# Setup Django
|
||
sys.path.insert(0, '/app')
|
||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')
|
||
django.setup()
|
||
|
||
import requests
|
||
import json
|
||
from igny8_core.modules.system.models import IntegrationSettings
|
||
from igny8_core.auth.models import Account
|
||
|
||
# Get API configuration
|
||
account = Account.objects.filter(slug='aws-admin').first()
|
||
settings = IntegrationSettings.objects.filter(
|
||
integration_type='openai',
|
||
account=account,
|
||
is_active=True
|
||
).first()
|
||
|
||
config = settings.config or {}
|
||
api_key = config.get('apiKey')
|
||
model = config.get('model', 'gpt-4o')
|
||
|
||
print(f"🧪 Testing AI Model: {model}")
|
||
print("=" * 70)
|
||
print()
|
||
|
||
# Test with a prompt requiring long response
|
||
test_prompt = """Write a comprehensive 1500-word article about "The Benefits of Organic Cotton Bedding" with these 7 sections:
|
||
|
||
1. Introduction (200 words)
|
||
2. Health and Skin Benefits (250 words)
|
||
3. Environmental Sustainability (250 words)
|
||
4. Quality and Durability (250 words)
|
||
5. Cost Analysis (200 words)
|
||
6. Buying Guide (250 words)
|
||
7. Conclusion (100 words)
|
||
|
||
Include specific examples, data points, and detailed comparisons in each section."""
|
||
|
||
tests = [
|
||
{"name": "Test with max_tokens=4096", "max_tokens": 4096},
|
||
{"name": "Test with max_tokens=8192", "max_tokens": 8192},
|
||
{"name": "Test with max_tokens=16384", "max_tokens": 16384},
|
||
]
|
||
|
||
results = []
|
||
|
||
for test in tests:
|
||
print(f"\n{test['name']}")
|
||
print("-" * 70)
|
||
|
||
try:
|
||
response = requests.post(
|
||
'https://api.openai.com/v1/chat/completions',
|
||
headers={
|
||
'Authorization': f'Bearer {api_key}',
|
||
'Content-Type': 'application/json',
|
||
},
|
||
json={
|
||
'model': model,
|
||
'messages': [{'role': 'user', 'content': test_prompt}],
|
||
'max_tokens': test['max_tokens'],
|
||
'temperature': 0.7,
|
||
},
|
||
timeout=120
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
data = response.json()
|
||
usage = data.get('usage', {})
|
||
finish_reason = data['choices'][0].get('finish_reason', 'unknown')
|
||
|
||
result = {
|
||
'max_tokens': test['max_tokens'],
|
||
'prompt_tokens': usage.get('prompt_tokens', 0),
|
||
'completion_tokens': usage.get('completion_tokens', 0),
|
||
'total_tokens': usage.get('total_tokens', 0),
|
||
'finish_reason': finish_reason,
|
||
'status': 'success'
|
||
}
|
||
results.append(result)
|
||
|
||
print(f"✓ Response received")
|
||
print(f" Prompt Tokens: {result['prompt_tokens']}")
|
||
print(f" Completion Tokens: {result['completion_tokens']}")
|
||
print(f" Total Tokens: {result['total_tokens']}")
|
||
print(f" Finish Reason: {finish_reason}")
|
||
|
||
if finish_reason == 'length':
|
||
print(f" ⚠️ TRUNCATED: Hit the max_tokens={test['max_tokens']} limit!")
|
||
elif finish_reason == 'stop':
|
||
print(f" ✅ COMPLETE: Response finished naturally")
|
||
else:
|
||
print(f"✗ API Error: {response.status_code}")
|
||
error_data = response.json()
|
||
print(f" Error: {error_data.get('error', {}).get('message', 'Unknown error')}")
|
||
results.append({
|
||
'max_tokens': test['max_tokens'],
|
||
'status': 'error',
|
||
'error': error_data.get('error', {}).get('message', 'Unknown error')
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"✗ Exception: {str(e)}")
|
||
results.append({
|
||
'max_tokens': test['max_tokens'],
|
||
'status': 'exception',
|
||
'error': str(e)
|
||
})
|
||
|
||
# Print summary
|
||
print("\n" + "=" * 70)
|
||
print("📊 SUMMARY")
|
||
print("=" * 70)
|
||
|
||
success_results = [r for r in results if r.get('status') == 'success']
|
||
|
||
if len(success_results) >= 2:
|
||
for i, result in enumerate(success_results):
|
||
status_icon = "⚠️ " if result['finish_reason'] == 'length' else "✅"
|
||
print(f"{status_icon} max_tokens={result['max_tokens']:5d}: {result['completion_tokens']:5d} tokens generated ({result['finish_reason']})")
|
||
|
||
print()
|
||
# Compare results
|
||
if success_results[0]['finish_reason'] == 'length' and success_results[-1]['finish_reason'] == 'stop':
|
||
print("✅ VERIFIED: Higher max_tokens allows longer, complete responses!")
|
||
print(f" Model can generate {success_results[-1]['completion_tokens']} tokens without truncation")
|
||
elif success_results[-1]['completion_tokens'] > success_results[0]['max_tokens']:
|
||
print(f"✅ VERIFIED: Model generated {success_results[-1]['completion_tokens']} tokens")
|
||
print(f" This exceeds the {success_results[0]['max_tokens']} limit from first test")
|
||
else:
|
||
print("ℹ️ All tests completed but responses were similar in length")
|
||
print(" The prompt may not require more than ~4000 tokens")
|
||
|
||
print()
|
||
print("💡 RECOMMENDATION:")
|
||
if any(r.get('finish_reason') == 'length' and r.get('max_tokens', 0) <= 8192 for r in success_results):
|
||
print(" Content generation needs max_tokens > 8192 for long articles (1200+ words)")
|
||
print(" Suggest: Set max_tokens to 16384 for content_generation function")
|
||
else:
|
||
print(" Current max_tokens=8192 appears sufficient for most content")
|