#!/usr/bin/env python """ Test script to verify if AI model can actually use 8192+ max_tokens """ import os import sys import django # Setup Django sys.path.insert(0, '/app') os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings') django.setup() import requests import json from igny8_core.modules.system.models import IntegrationSettings from igny8_core.auth.models import Account # Get API configuration account = Account.objects.filter(slug='aws-admin').first() settings = IntegrationSettings.objects.filter( integration_type='openai', account=account, is_active=True ).first() config = settings.config or {} api_key = config.get('apiKey') model = config.get('model', 'gpt-4o') print(f"๐Ÿงช Testing AI Model: {model}") print("=" * 70) print() # Test with a prompt requiring long response test_prompt = """Write a comprehensive 1500-word article about "The Benefits of Organic Cotton Bedding" with these 7 sections: 1. Introduction (200 words) 2. Health and Skin Benefits (250 words) 3. Environmental Sustainability (250 words) 4. Quality and Durability (250 words) 5. Cost Analysis (200 words) 6. Buying Guide (250 words) 7. Conclusion (100 words) Include specific examples, data points, and detailed comparisons in each section.""" tests = [ {"name": "Test with max_tokens=4096", "max_tokens": 4096}, {"name": "Test with max_tokens=8192", "max_tokens": 8192}, {"name": "Test with max_tokens=16384", "max_tokens": 16384}, ] results = [] for test in tests: print(f"\n{test['name']}") print("-" * 70) try: response = requests.post( 'https://api.openai.com/v1/chat/completions', headers={ 'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json', }, json={ 'model': model, 'messages': [{'role': 'user', 'content': test_prompt}], 'max_tokens': test['max_tokens'], 'temperature': 0.7, }, timeout=120 ) if response.status_code == 200: data = response.json() usage = data.get('usage', {}) finish_reason = data['choices'][0].get('finish_reason', 'unknown') result = { 'max_tokens': test['max_tokens'], 'prompt_tokens': usage.get('prompt_tokens', 0), 'completion_tokens': usage.get('completion_tokens', 0), 'total_tokens': usage.get('total_tokens', 0), 'finish_reason': finish_reason, 'status': 'success' } results.append(result) print(f"โœ“ Response received") print(f" Prompt Tokens: {result['prompt_tokens']}") print(f" Completion Tokens: {result['completion_tokens']}") print(f" Total Tokens: {result['total_tokens']}") print(f" Finish Reason: {finish_reason}") if finish_reason == 'length': print(f" โš ๏ธ TRUNCATED: Hit the max_tokens={test['max_tokens']} limit!") elif finish_reason == 'stop': print(f" โœ… COMPLETE: Response finished naturally") else: print(f"โœ— API Error: {response.status_code}") error_data = response.json() print(f" Error: {error_data.get('error', {}).get('message', 'Unknown error')}") results.append({ 'max_tokens': test['max_tokens'], 'status': 'error', 'error': error_data.get('error', {}).get('message', 'Unknown error') }) except Exception as e: print(f"โœ— Exception: {str(e)}") results.append({ 'max_tokens': test['max_tokens'], 'status': 'exception', 'error': str(e) }) # Print summary print("\n" + "=" * 70) print("๐Ÿ“Š SUMMARY") print("=" * 70) success_results = [r for r in results if r.get('status') == 'success'] if len(success_results) >= 2: for i, result in enumerate(success_results): status_icon = "โš ๏ธ " if result['finish_reason'] == 'length' else "โœ…" print(f"{status_icon} max_tokens={result['max_tokens']:5d}: {result['completion_tokens']:5d} tokens generated ({result['finish_reason']})") print() # Compare results if success_results[0]['finish_reason'] == 'length' and success_results[-1]['finish_reason'] == 'stop': print("โœ… VERIFIED: Higher max_tokens allows longer, complete responses!") print(f" Model can generate {success_results[-1]['completion_tokens']} tokens without truncation") elif success_results[-1]['completion_tokens'] > success_results[0]['max_tokens']: print(f"โœ… VERIFIED: Model generated {success_results[-1]['completion_tokens']} tokens") print(f" This exceeds the {success_results[0]['max_tokens']} limit from first test") else: print("โ„น๏ธ All tests completed but responses were similar in length") print(" The prompt may not require more than ~4000 tokens") print() print("๐Ÿ’ก RECOMMENDATION:") if any(r.get('finish_reason') == 'length' and r.get('max_tokens', 0) <= 8192 for r in success_results): print(" Content generation needs max_tokens > 8192 for long articles (1200+ words)") print(" Suggest: Set max_tokens to 16384 for content_generation function") else: print(" Current max_tokens=8192 appears sufficient for most content")