igny8/backend/igny8_core/ai/prompts.py

"""
Prompt Registry - Centralized prompt management with override hierarchy
Supports: task-level overrides → DB prompts → default fallbacks
"""
import logging
from typing import Dict, Any, Optional
from django.db import models

logger = logging.getLogger(__name__)


class PromptRegistry:
    """
    Centralized prompt registry with hierarchical resolution:
    1. Task-level prompt_override (if exists)
    2. DB prompt for (account, function)
    3. Default fallback from registry
    """

    # Default prompts stored in registry
    DEFAULT_PROMPTS = {
        'clustering': """You are a semantic strategist and SEO architecture engine. Your task is to analyze the provided keyword list and group them into meaningful, intent-driven topic clusters that reflect how real users search, think, and act online.

Return a single JSON object with a "clusters" array. Each cluster must follow this structure:

{
  "name": "[Descriptive cluster name — natural, SEO-relevant, clearly expressing the topic]",
  "description": "[1–2 concise sentences explaining what this cluster covers and why these keywords belong together]",
  "keywords": ["keyword 1", "keyword 2", "keyword 3", "..."]
}

CLUSTERING STRATEGY:

1. Keyword-first, structure-follows:
   - Do NOT rely on assumed categories or existing content structures.
   - Begin purely from the meaning, intent, and behavioral connection between keywords.

2. Use multi-dimensional grouping logic:
   - Group keywords by these behavioral dimensions:
     • Search Intent → informational, commercial, transactional, navigational
     • Use-Case or Problem → what the user is trying to achieve or solve
     • Function or Feature → how something works or what it does
     • Persona or Audience → who the content or product serves
     • Context → location, time, season, platform, or device
   - Combine 2–3 dimensions naturally where they make sense.

3. Model real search behavior:
   - Favor clusters that form natural user journeys such as:
     • Problem ➝ Solution
     • General ➝ Specific
     • Product ➝ Use-case
     • Buyer ➝ Benefit
     • Tool ➝ Function
     • Task ➝ Method
   - Each cluster should feel like a real topic hub users would explore in depth.

4. Avoid superficial groupings:
   - Do not cluster keywords just because they share words.
   - Do not force-fit outliers or unrelated keywords.
   - Exclude keywords that don't logically connect to any cluster.

5. Quality rules:
   - Each cluster should include between 3–10 strongly related keywords.
   - Never duplicate a keyword across multiple clusters.
   - Prioritize semantic strength, search intent, and usefulness for SEO-driven content structure.
   - It's better to output fewer, high-quality clusters than many weak or shallow ones.

INPUT FORMAT:
{
  "keywords": [IGNY8_KEYWORDS]
}

OUTPUT FORMAT:
Return ONLY the final JSON object in this format:
{
  "clusters": [
    {
      "name": "...",
      "description": "...",
      "keywords": ["...", "...", "..."]
    }
  ]
}

Do not include any explanations, text, or commentary outside the JSON output.
""",

        'ideas': """Generate SEO-optimized, high-quality content ideas and outlines for each keyword cluster.
Input:
Clusters: [IGNY8_CLUSTERS]
Keywords: [IGNY8_CLUSTER_KEYWORDS]

Output: JSON with "ideas" array.
Each cluster → 1 cluster_hub + 2–4 supporting ideas.
Each idea must include:
title, description, content_type, content_structure, cluster_id, estimated_word_count (1500–2200), and covered_keywords.

Outline Rules:

Intro: 1 hook (30–40 words) + 2 intro paragraphs (50–60 words each).

5–8 H2 sections, each with 2–3 H3s.

Each H2 ≈ 250–300 words, mixed content (paragraphs, lists, tables, blockquotes).

Vary section format and tone; no bullets or lists at start.

Tables have columns; blockquotes = expert POV or data insight.

Use depth, examples, and real context.

Avoid repetitive structure.

Tone: Professional editorial flow. No generic phrasing. Use varied sentence openings and realistic examples.

Output JSON Example:

{
  "ideas": [
    {
      "title": "Best Organic Cotton Duvet Covers for All Seasons",
      "description": {
        "introduction": {
          "hook": "Transform your sleep with organic cotton that blends comfort and sustainability.",
          "paragraphs": [
            {"content_type": "paragraph", "details": "Overview of organic cotton's rise in bedding industry."},
            {"content_type": "paragraph", "details": "Why consumers prefer organic bedding over synthetic alternatives."}
          ]
        },
        "H2": [
          {
            "heading": "Why Choose Organic Cotton for Bedding?",
            "subsections": [
              {"subheading": "Health and Skin Benefits", "content_type": "paragraph", "details": "Discuss hypoallergenic and chemical-free aspects."},
              {"subheading": "Environmental Sustainability", "content_type": "list", "details": "Eco benefits like low water use, no pesticides."},
              {"subheading": "Long-Term Cost Savings", "content_type": "table", "details": "Compare durability and pricing over time."}
            ]
          }
        ]
      },
      "content_type": "post",
      "content_structure": "review",
      "cluster_id": 12,
      "estimated_word_count": 1800,
      "covered_keywords": "organic duvet covers, eco-friendly bedding, sustainable sheets"
    }
  ]
}""",

        'content_generation': """You are an editorial content strategist. Your task is to generate a complete JSON response object that includes all the fields listed below, based on the provided content idea, keyword cluster, and keyword list.

Only the `content` field should contain HTML inside JSON object.

==================
Generate a complete JSON response object matching this structure:
==================

{
  "title": "[Blog title using the primary keyword — full sentence case]",
  "meta_title": "[Meta title under 60 characters — natural, optimized, and compelling]",
  "meta_description": "[Meta description under 160 characters — clear and enticing summary]",
  "content": "[HTML content — full editorial structure with <p>, <h2>, <h3>, <ul>, <ol>, <table>]",
  "word_count": [Exact integer — word count of HTML body only],
  "primary_keyword": "[Single primary keyword used in title and first paragraph]",
  "secondary_keywords": [
    "[Keyword 1]",
    "[Keyword 2]",
    "[Keyword 3]"
  ],
  "tags": [
    "[2–4 word lowercase tag 1]",
    "[2–4 word lowercase tag 2]",
    "[2–4 word lowercase tag 3]",
    "[2–4 word lowercase tag 4]",
    "[2–4 word lowercase tag 5]"
  ],
  "categories": [
    "[Parent Category > Child Category]",
    "[Optional Second Category > Optional Subcategory]"
  ]
}

===========================
CONTENT FLOW RULES
===========================

**INTRODUCTION:**
- Start with 1 italicized hook (30–40 words)
- Follow with 2 narrative paragraphs (each 50–60 words; 2–3 sentences max)
- No headings allowed in intro

**H2 SECTIONS (5–8 total):**
Each section should be 250–300 words and follow this format:
1. Two narrative paragraphs (80–120 words each, 2–3 sentences)
2. One list or table (must come *after* a paragraph)
3. Optional closing paragraph (40–60 words)
4. Insert 2–3  subsections naturally after main paragraphs

**Formatting Rules:**
- Vary use of unordered lists, ordered lists, and tables across sections
- Never begin any section or sub-section with a list or table

===========================
KEYWORD & SEO RULES
===========================

- **Primary keyword** must appear in:
  - The title
  - First paragraph of the introduction
  - At least 2 H2 headings

- **Secondary keywords** must be used naturally, not forced

- **Tone & style guidelines:**
  - No robotic or passive voice
  - Avoid generic intros like "In today's world…"
  - Don't repeat heading in opening sentence
  - Vary sentence structure and length


===========================
INPUT VARIABLES
===========================

CONTENT IDEA DETAILS:
[IGNY8_IDEA]

KEYWORD CLUSTER:
[IGNY8_CLUSTER]

ASSOCIATED KEYWORDS:
[IGNY8_KEYWORDS]

===========================
OUTPUT FORMAT
===========================

Return ONLY the final JSON object.
Do NOT include any comments, formatting, or explanations.""",

        'image_prompt_extraction': """Extract image prompts from the following article content.

ARTICLE TITLE: {title}

ARTICLE CONTENT:
{content}

Extract image prompts for:
1. Featured Image: One main image that represents the article topic
2. In-Article Images: Up to {max_images} images that would be useful within the article content

Return a JSON object with this structure:
{{
    "featured_prompt": "Detailed description of the featured image",
    "in_article_prompts": [
        "Description of first in-article image",
        "Description of second in-article image",
        ...
    ]
}}

Make sure each prompt is detailed enough for image generation, describing the visual elements, style, mood, and composition.""",

        'image_prompt_template': 'Create a high-quality {image_type} image to use as a featured photo for a blog post titled "{post_title}". The image should visually represent the theme, mood, and subject implied by the image prompt: {image_prompt}. Focus on a realistic, well-composed scene that naturally communicates the topic without text or logos. Use balanced lighting, pleasing composition, and photographic detail suitable for lifestyle or editorial web content. Avoid adding any visible or readable text, brand names, or illustrative effects. **And make sure image is not blurry.**',

        'negative_prompt': 'text, watermark, logo, overlay, title, caption, writing on walls, writing on objects, UI, infographic elements, post title',
    }

    # Mapping from function names to prompt types
    FUNCTION_TO_PROMPT_TYPE = {
        'auto_cluster': 'clustering',
        'generate_ideas': 'ideas',
        'generate_content': 'content_generation',
        'generate_images': 'image_prompt_extraction',
        'extract_image_prompts': 'image_prompt_extraction',
        'generate_image_prompts': 'image_prompt_extraction',
    }

    @classmethod
    def get_prompt(
        cls,
        function_name: str,
        account: Optional[Any] = None,
        task: Optional[Any] = None,
        context: Optional[Dict[str, Any]] = None
    ) -> str:
        """
        Get prompt for a function with hierarchical resolution.

        Priority:
        1. task.prompt_override (if task provided and has override)
        2. DB prompt for (account, function)
        3. Default fallback from registry

        Args:
            function_name: AI function name (e.g., 'auto_cluster', 'generate_ideas')
            account: Account object (optional)
            task: Task object with optional prompt_override (optional)
            context: Additional context for prompt rendering (optional)

        Returns:
            Prompt string ready for formatting
        """
        # Step 1: Check task-level override
        if task and hasattr(task, 'prompt_override') and task.prompt_override:
            logger.info(f"Using task-level prompt override for {function_name}")
            prompt = task.prompt_override
            return cls._render_prompt(prompt, context or {})

        # Step 2: Get prompt type
        prompt_type = cls.FUNCTION_TO_PROMPT_TYPE.get(function_name, function_name)

        # Step 3: Try DB prompt
        if account:
            try:
                from igny8_core.modules.system.models import AIPrompt
                db_prompt = AIPrompt.objects.get(
                    account=account,
                    prompt_type=prompt_type,
                    is_active=True
                )
                logger.info(f"Using DB prompt for {function_name} (account {account.id})")
                prompt = db_prompt.prompt_value
                return cls._render_prompt(prompt, context or {})
            except Exception as e:
                logger.debug(f"No DB prompt found for {function_name}: {e}")

        # Step 4: Use default fallback
        prompt = cls.DEFAULT_PROMPTS.get(prompt_type, '')
        if not prompt:
            logger.warning(f"No default prompt found for {prompt_type}, using empty string")

        return cls._render_prompt(prompt, context or {})

    @classmethod
    def _render_prompt(cls, prompt_template: str, context: Dict[str, Any]) -> str:
        """
        Render prompt template with context variables.
        Supports both .format() style ({variable}) and placeholder replacement ([IGNY8_*]).

        Args:
            prompt_template: Prompt template string
            context: Context variables for rendering

        Returns:
            Rendered prompt string
        """
        if not context:
            return prompt_template

        rendered = prompt_template

        # Step 1: Replace [IGNY8_*] placeholders first (always do this)
        for key, value in context.items():
            placeholder = f'[IGNY8_{key.upper()}]'
            if placeholder in rendered:
                rendered = rendered.replace(placeholder, str(value))
                logger.debug(f"Replaced placeholder {placeholder} with {len(str(value))} characters")

        # Step 2: Try .format() style for {variable} placeholders (if any remain)
        # Normalize context keys - convert UPPER to lowercase for .format()
        normalized_context = {}
        for key, value in context.items():
            # Try both original key and lowercase version
            normalized_context[key] = value
            normalized_context[key.lower()] = value

        # Only try .format() if there are {variable} placeholders
        if '{' in rendered and '}' in rendered:
            try:
                rendered = rendered.format(**normalized_context)
            except (KeyError, ValueError) as e:
                # If .format() fails, log warning but keep the [IGNY8_*] replacements
                logger.warning(f"Failed to format prompt with .format(): {e}. Using [IGNY8_*] replacements only.")

        return rendered

    @classmethod
    def get_image_prompt_template(cls, account: Optional[Any] = None) -> str:
        """
        Get image prompt template.
        Returns template string (not rendered) - caller should format with .format()
        """
        prompt_type = 'image_prompt_template'

        # Try DB prompt
        if account:
            try:
                from igny8_core.modules.system.models import AIPrompt
                db_prompt = AIPrompt.objects.get(
                    account=account,
                    prompt_type=prompt_type,
                    is_active=True
                )
                return db_prompt.prompt_value
            except Exception:
                pass

        # Use default
        return cls.DEFAULT_PROMPTS.get(prompt_type, '')

    @classmethod
    def get_negative_prompt(cls, account: Optional[Any] = None) -> str:
        """
        Get negative prompt.
        Returns template string (not rendered).
        """
        prompt_type = 'negative_prompt'

        # Try DB prompt
        if account:
            try:
                from igny8_core.modules.system.models import AIPrompt
                db_prompt = AIPrompt.objects.get(
                    account=account,
                    prompt_type=prompt_type,
                    is_active=True
                )
                return db_prompt.prompt_value
            except Exception:
                pass

        # Use default
        return cls.DEFAULT_PROMPTS.get(prompt_type, '')


# Convenience function for backward compatibility
def get_prompt(function_name: str, account=None, task=None, context=None) -> str:
    """Get prompt using registry"""
    return PromptRegistry.get_prompt(function_name, account=account, task=task, context=context)