image gen mess

2026-01-03 22:31:30 +00:00
parent f518e1751b
commit c4de8994dd
9 changed files with 453 additions and 221 deletions
--- a/backend/igny8_core/ai/ai_core.py
+++ b/backend/igny8_core/ai/ai_core.py
@@ -722,7 +722,8 @@ class AICore:
        n: int = 1,
        api_key: Optional[str] = None,
        negative_prompt: Optional[str] = None,
-        function_name: str = 'generate_image'
+        function_name: str = 'generate_image',
+        style: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Generate image using AI with console logging.
@@ -743,7 +744,7 @@ class AICore:
        print(f"[AI][{function_name}] Step 1: Preparing image generation request...")
        
        if provider == 'openai':
-            return self._generate_image_openai(prompt, model, size, n, api_key, negative_prompt, function_name)
+            return self._generate_image_openai(prompt, model, size, n, api_key, negative_prompt, function_name, style)
        elif provider == 'runware':
            return self._generate_image_runware(prompt, model, size, n, api_key, negative_prompt, function_name)
        elif provider == 'bria':
@@ -767,9 +768,15 @@ class AICore:
        n: int,
        api_key: Optional[str],
        negative_prompt: Optional[str],
-        function_name: str
+        function_name: str,
+        style: Optional[str] = None
    ) -> Dict[str, Any]:
-        """Generate image using OpenAI DALL-E"""
+        """Generate image using OpenAI DALL-E
+        
+        Args:
+            style: For DALL-E 3 only. 'vivid' (hyper-real/dramatic) or 'natural' (more realistic).
+                   Default is 'natural' for realistic photos.
+        """
        print(f"[AI][{function_name}] Provider: OpenAI")
        
        # Determine character limit based on model
@@ -854,6 +861,15 @@ class AICore:
            'size': size
        }
        
+        # For DALL-E 3, add style parameter
+        # 'natural' = more realistic photos, 'vivid' = hyper-real/dramatic
+        if model == 'dall-e-3':
+            # Default to 'natural' for realistic images, but respect user preference
+            dalle_style = style if style in ['vivid', 'natural'] else 'natural'
+            data['style'] = dalle_style
+            data['quality'] = 'hd'  # Always use HD quality for best results
+            print(f"[AI][{function_name}] DALL-E 3 style: {dalle_style}, quality: hd")
+        
        if negative_prompt:
            # Note: OpenAI DALL-E doesn't support negative_prompt in API, but we log it
            print(f"[AI][{function_name}] Note: Negative prompt provided but OpenAI DALL-E doesn't support it")
@@ -998,8 +1014,11 @@ class AICore:
        
        # Model-specific parameter configuration based on Runware documentation
        if runware_model.startswith('bria:'):
-            # Bria 3.2 (bria:10@1) - Commercial-ready, steps 4-10 (default 8)
-            inference_task['steps'] = 8
+            # Bria 3.2 (bria:10@1) - Commercial-ready, steps 20-50 (API requires minimum 20)
+            inference_task['steps'] = 20
+            # Enhanced negative prompt for Bria to prevent disfigured images
+            enhanced_negative = (negative_prompt or '') + ', disfigured, deformed, bad anatomy, wrong anatomy, extra limbs, missing limbs, floating limbs, mutated hands, extra fingers, missing fingers, fused fingers, poorly drawn hands, poorly drawn face, mutation, ugly, blurry, low quality, worst quality, jpeg artifacts, watermark, text, signature'
+            inference_task['negativePrompt'] = enhanced_negative
            # Bria provider settings for enhanced quality
            inference_task['providerSettings'] = {
                'bria': {
@@ -1009,12 +1028,15 @@ class AICore:
                    'contentModeration': True
                }
            }
-            print(f"[AI][{function_name}] Using Bria 3.2 config: steps=8, providerSettings enabled")
+            print(f"[AI][{function_name}] Using Bria 3.2 config: steps=20, enhanced negative prompt, providerSettings enabled")
        elif runware_model.startswith('google:'):
-            # Nano Banana (google:4@2) - Premium quality, no explicit steps needed
-            # Google models handle steps internally
+            # Nano Banana (google:4@2) - Premium quality
+            # Google models use 'resolution' parameter INSTEAD of width/height
+            # Remove width/height and use resolution only
+            del inference_task['width']
+            del inference_task['height']
            inference_task['resolution'] = '1k'  # Use 1K tier for optimal speed/quality
-            print(f"[AI][{function_name}] Using Nano Banana config: resolution=1k")
+            print(f"[AI][{function_name}] Using Nano Banana config: resolution=1k (no width/height)")
        else:
            # Hi Dream Full (runware:97@1) - General diffusion, steps 20, CFGScale 7
            inference_task['steps'] = 20
@@ -1036,7 +1058,29 @@ class AICore:
            print(f"[AI][{function_name}] Step 4: Received response in {request_duration:.2f}s (status={response.status_code})")
            
            if response.status_code != 200:
-                error_msg = f"HTTP {response.status_code} error"
+                # Log the full error response for debugging
+                try:
+                    error_body = response.json()
+                    print(f"[AI][{function_name}][Error] Runware error response: {error_body}")
+                    logger.error(f"[AI][{function_name}] Runware HTTP {response.status_code} error body: {error_body}")
+                    
+                    # Extract specific error message from Runware response
+                    error_detail = None
+                    if isinstance(error_body, list):
+                        for item in error_body:
+                            if isinstance(item, dict) and 'errors' in item:
+                                errors = item['errors']
+                                if isinstance(errors, list) and len(errors) > 0:
+                                    err = errors[0]
+                                    error_detail = err.get('message') or err.get('error') or str(err)
+                                    break
+                    elif isinstance(error_body, dict):
+                        error_detail = error_body.get('message') or error_body.get('error') or str(error_body)
+                    
+                    error_msg = f"HTTP {response.status_code}: {error_detail}" if error_detail else f"HTTP {response.status_code} error"
+                except Exception as e:
+                    error_msg = f"HTTP {response.status_code} error (could not parse response: {e})"
+                
                print(f"[AI][{function_name}][Error] {error_msg}")
                return {
                    'url': None,
--- a/backend/igny8_core/ai/tasks.py
+++ b/backend/igny8_core/ai/tasks.py
@@ -218,17 +218,41 @@ def process_image_generation_queue(self, image_ids: list, account_id: int = None
    image_type = config.get('image_type') or global_settings.image_style
    image_format = config.get('image_format', 'webp')
    
+    # Style to prompt enhancement mapping
+    # These style descriptors are added to the image prompt for better results
+    STYLE_PROMPT_MAP = {
+        # Runware styles
+        'photorealistic': 'ultra realistic photography, natural lighting, real world look, photorealistic',
+        'illustration': 'digital illustration, clean lines, artistic style, modern illustration',
+        '3d_render': 'computer generated 3D render, modern polished 3D style, depth and dramatic lighting',
+        'minimal_flat': 'minimal flat design, simple shapes, flat colors, modern graphic design aesthetic',
+        'artistic': 'artistic painterly style, expressive brushstrokes, hand painted aesthetic',
+        'cartoon': 'cartoon stylized illustration, playful exaggerated forms, animated character style',
+        # DALL-E styles (mapped from OpenAI API style parameter)
+        'natural': 'natural realistic style',
+        'vivid': 'vivid dramatic hyper-realistic style',
+        # Legacy fallbacks
+        'realistic': 'ultra realistic photography, natural lighting, photorealistic',
+    }
+    
+    # Get the style description for prompt enhancement
+    style_description = STYLE_PROMPT_MAP.get(image_type, STYLE_PROMPT_MAP.get('photorealistic'))
+    logger.info(f"[process_image_generation_queue] Style: {image_type} -> prompt enhancement: {style_description[:50]}...")
+    
    # Model-specific landscape sizes (square is always 1024x1024)
-    # Based on Runware documentation for optimal results per model
+    # For Runware models - based on Runware documentation for optimal results per model
+    # For OpenAI DALL-E 3 - uses 1792x1024 for landscape
    MODEL_LANDSCAPE_SIZES = {
        'runware:97@1': '1280x768',   # Hi Dream Full landscape
        'bria:10@1': '1344x768',       # Bria 3.2 landscape (16:9)
        'google:4@2': '1376x768',      # Nano Banana landscape (16:9)
+        'dall-e-3': '1792x1024',       # DALL-E 3 landscape
+        'dall-e-2': '1024x1024',       # DALL-E 2 only supports square
    }
    DEFAULT_SQUARE_SIZE = '1024x1024'
    
    # Get model-specific landscape size for featured images
-    model_landscape_size = MODEL_LANDSCAPE_SIZES.get(model, '1280x768')
+    model_landscape_size = MODEL_LANDSCAPE_SIZES.get(model, '1792x1024' if provider == 'openai' else '1280x768')
    
    # Featured image always uses model-specific landscape size
    featured_image_size = model_landscape_size
@@ -398,7 +422,7 @@ def process_image_generation_queue(self, image_ids: list, account_id: int = None
                    # Calculate actual template length with placeholders filled
                    # Format template with dummy values to measure actual length
                    template_with_dummies = image_prompt_template.format(
-                        image_type=image_type,
+                        image_type=style_description,  # Use actual style description length
                        post_title='X' * len(post_title),  # Use same length as actual post_title
                        image_prompt=''  # Empty to measure template overhead
                    )
@@ -425,7 +449,7 @@ def process_image_generation_queue(self, image_ids: list, account_id: int = None
                            image_prompt = image_prompt[:max_image_prompt_length - 3] + "..."
                    
                    formatted_prompt = image_prompt_template.format(
-                        image_type=image_type,
+                        image_type=style_description,  # Use full style description instead of raw value
                        post_title=post_title,
                        image_prompt=image_prompt
                    )
@@ -510,6 +534,21 @@ def process_image_generation_queue(self, image_ids: list, account_id: int = None
            else:  # desktop or other (legacy)
                image_size = in_article_square_size  # Default to square
            
+            # For DALL-E, convert image_type to style parameter
+            # image_type is from user settings (e.g., 'vivid', 'natural', 'realistic')
+            # DALL-E accepts 'vivid' or 'natural' - map accordingly
+            dalle_style = None
+            if provider == 'openai':
+                # Map image_type to DALL-E style
+                # 'natural' = more realistic photos (default)
+                # 'vivid' = hyper-real, dramatic images
+                if image_type in ['vivid']:
+                    dalle_style = 'vivid'
+                else:
+                    # Default to 'natural' for realistic photos
+                    dalle_style = 'natural'
+                logger.info(f"[process_image_generation_queue] DALL-E style: {dalle_style} (from image_type: {image_type})")
+            
            result = ai_core.generate_image(
                prompt=formatted_prompt,
                provider=provider,
@@ -517,7 +556,8 @@ def process_image_generation_queue(self, image_ids: list, account_id: int = None
                size=image_size,
                api_key=api_key,
                negative_prompt=negative_prompt,
-                function_name='generate_images_from_prompts'
+                function_name='generate_images_from_prompts',
+                style=dalle_style
            )
            
            # Update progress: Image generation complete (50%)