Files
igny8/backend/igny8_core/ai/prompts.py

432 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Prompt Registry - Centralized prompt management with override hierarchy
Supports: task-level overrides → DB prompts → default fallbacks
"""
import logging
from typing import Dict, Any, Optional
from django.db import models
logger = logging.getLogger(__name__)
class PromptRegistry:
"""
Centralized prompt registry with hierarchical resolution:
1. Task-level prompt_override (if exists)
2. DB prompt for (account, function)
3. Default fallback from registry
"""
# Default prompts stored in registry
DEFAULT_PROMPTS = {
'clustering': """You are a semantic strategist and SEO architecture engine. Your task is to analyze the provided keyword list and group them into meaningful, intent-driven topic clusters that reflect how real users search, think, and act online.
Return a single JSON object with a "clusters" array. Each cluster must follow this structure:
{
"name": "[Descriptive cluster name — natural, SEO-relevant, clearly expressing the topic]",
"description": "[12 concise sentences explaining what this cluster covers and why these keywords belong together]",
"keywords": ["keyword 1", "keyword 2", "keyword 3", "..."]
}
CLUSTERING STRATEGY:
1. Keyword-first, structure-follows:
- Do NOT rely on assumed categories or existing content structures.
- Begin purely from the meaning, intent, and behavioral connection between keywords.
2. Use multi-dimensional grouping logic:
- Group keywords by these behavioral dimensions:
• Search Intent → informational, commercial, transactional, navigational
• Use-Case or Problem → what the user is trying to achieve or solve
• Function or Feature → how something works or what it does
• Persona or Audience → who the content or product serves
• Context → location, time, season, platform, or device
- Combine 23 dimensions naturally where they make sense.
3. Model real search behavior:
- Favor clusters that form natural user journeys such as:
• Problem ➝ Solution
• General ➝ Specific
• Product ➝ Use-case
• Buyer ➝ Benefit
• Tool ➝ Function
• Task ➝ Method
- Each cluster should feel like a real topic hub users would explore in depth.
4. Avoid superficial groupings:
- Do not cluster keywords just because they share words.
- Do not force-fit outliers or unrelated keywords.
- Exclude keywords that don't logically connect to any cluster.
5. Quality rules:
- Each cluster should include between 310 strongly related keywords.
- Never duplicate a keyword across multiple clusters.
- Prioritize semantic strength, search intent, and usefulness for SEO-driven content structure.
- It's better to output fewer, high-quality clusters than many weak or shallow ones.
INPUT FORMAT:
{
"keywords": [IGNY8_KEYWORDS]
}
OUTPUT FORMAT:
Return ONLY the final JSON object in this format:
{
"clusters": [
{
"name": "...",
"description": "...",
"keywords": ["...", "...", "..."]
}
]
}
Do not include any explanations, text, or commentary outside the JSON output.
""",
'ideas': """Generate SEO-optimized, high-quality content ideas and outlines for each keyword cluster.
Input:
Clusters: [IGNY8_CLUSTERS]
Keywords: [IGNY8_CLUSTER_KEYWORDS]
Output: JSON with "ideas" array.
Each cluster → 1 cluster_hub + 24 supporting ideas.
Each idea must include:
title, description, content_type, content_structure, cluster_id, estimated_word_count (15002200), and covered_keywords.
Outline Rules:
Intro: 1 hook (3040 words) + 2 intro paragraphs (5060 words each).
58 H2 sections, each with 23 H3s.
Each H2 ≈ 250300 words, mixed content (paragraphs, lists, tables, blockquotes).
Vary section format and tone; no bullets or lists at start.
Tables have columns; blockquotes = expert POV or data insight.
Use depth, examples, and real context.
Avoid repetitive structure.
Tone: Professional editorial flow. No generic phrasing. Use varied sentence openings and realistic examples.
Output JSON Example:
{
"ideas": [
{
"title": "Best Organic Cotton Duvet Covers for All Seasons",
"description": {
"introduction": {
"hook": "Transform your sleep with organic cotton that blends comfort and sustainability.",
"paragraphs": [
{"content_type": "paragraph", "details": "Overview of organic cotton's rise in bedding industry."},
{"content_type": "paragraph", "details": "Why consumers prefer organic bedding over synthetic alternatives."}
]
},
"H2": [
{
"heading": "Why Choose Organic Cotton for Bedding?",
"subsections": [
{"subheading": "Health and Skin Benefits", "content_type": "paragraph", "details": "Discuss hypoallergenic and chemical-free aspects."},
{"subheading": "Environmental Sustainability", "content_type": "list", "details": "Eco benefits like low water use, no pesticides."},
{"subheading": "Long-Term Cost Savings", "content_type": "table", "details": "Compare durability and pricing over time."}
]
}
]
},
"content_type": "post",
"content_structure": "review",
"cluster_id": 12,
"estimated_word_count": 1800,
"covered_keywords": "organic duvet covers, eco-friendly bedding, sustainable sheets"
}
]
}""",
'content_generation': """You are an editorial content strategist. Your task is to generate a complete JSON response object that includes all the fields listed below, based on the provided content idea, keyword cluster, and keyword list.
Only the `content` field should contain HTML inside JSON object.
==================
Generate a complete JSON response object matching this structure:
==================
{
"title": "[Blog title using the primary keyword — full sentence case]",
"meta_title": "[Meta title under 60 characters — natural, optimized, and compelling]",
"meta_description": "[Meta description under 160 characters — clear and enticing summary]",
"content": "[HTML content — full editorial structure with <p>, <h2>, <h3>, <ul>, <ol>, <table>]",
"word_count": [Exact integer — word count of HTML body only],
"primary_keyword": "[Single primary keyword used in title and first paragraph]",
"secondary_keywords": [
"[Keyword 1]",
"[Keyword 2]",
"[Keyword 3]"
],
"tags": [
"[24 word lowercase tag 1]",
"[24 word lowercase tag 2]",
"[24 word lowercase tag 3]",
"[24 word lowercase tag 4]",
"[24 word lowercase tag 5]"
],
"categories": [
"[Parent Category > Child Category]",
"[Optional Second Category > Optional Subcategory]"
]
}
===========================
CONTENT FLOW RULES
===========================
**INTRODUCTION:**
- Start with 1 italicized hook (3040 words)
- Follow with 2 narrative paragraphs (each 5060 words; 23 sentences max)
- No headings allowed in intro
**H2 SECTIONS (58 total):**
Each section should be 250300 words and follow this format:
1. Two narrative paragraphs (80120 words each, 23 sentences)
2. One list or table (must come *after* a paragraph)
3. Optional closing paragraph (4060 words)
4. Insert 23 subsections naturally after main paragraphs
**Formatting Rules:**
- Vary use of unordered lists, ordered lists, and tables across sections
- Never begin any section or sub-section with a list or table
===========================
KEYWORD & SEO RULES
===========================
- **Primary keyword** must appear in:
- The title
- First paragraph of the introduction
- At least 2 H2 headings
- **Secondary keywords** must be used naturally, not forced
- **Tone & style guidelines:**
- No robotic or passive voice
- Avoid generic intros like "In today's world…"
- Don't repeat heading in opening sentence
- Vary sentence structure and length
===========================
INPUT VARIABLES
===========================
CONTENT IDEA DETAILS:
[IGNY8_IDEA]
KEYWORD CLUSTER:
[IGNY8_CLUSTER]
ASSOCIATED KEYWORDS:
[IGNY8_KEYWORDS]
===========================
OUTPUT FORMAT
===========================
Return ONLY the final JSON object.
Do NOT include any comments, formatting, or explanations.""",
'image_prompt_extraction': """Extract image prompts from the following article content.
ARTICLE TITLE: {title}
ARTICLE CONTENT:
{content}
Extract image prompts for:
1. Featured Image: One main image that represents the article topic
2. In-Article Images: Up to {max_images} images that would be useful within the article content
Return a JSON object with this structure:
{{
"featured_prompt": "Detailed description of the featured image",
"in_article_prompts": [
"Description of first in-article image",
"Description of second in-article image",
...
]
}}
Make sure each prompt is detailed enough for image generation, describing the visual elements, style, mood, and composition.""",
'image_prompt_template': 'Create a high-quality {image_type} image to use as a featured photo for a blog post titled "{post_title}". The image should visually represent the theme, mood, and subject implied by the image prompt: {image_prompt}. Focus on a realistic, well-composed scene that naturally communicates the topic without text or logos. Use balanced lighting, pleasing composition, and photographic detail suitable for lifestyle or editorial web content. Avoid adding any visible or readable text, brand names, or illustrative effects. **And make sure image is not blurry.**',
'negative_prompt': 'text, watermark, logo, overlay, title, caption, writing on walls, writing on objects, UI, infographic elements, post title',
}
# Mapping from function names to prompt types
FUNCTION_TO_PROMPT_TYPE = {
'auto_cluster': 'clustering',
'generate_ideas': 'ideas',
'generate_content': 'content_generation',
'generate_images': 'image_prompt_extraction',
'extract_image_prompts': 'image_prompt_extraction',
}
@classmethod
def get_prompt(
cls,
function_name: str,
account: Optional[Any] = None,
task: Optional[Any] = None,
context: Optional[Dict[str, Any]] = None
) -> str:
"""
Get prompt for a function with hierarchical resolution.
Priority:
1. task.prompt_override (if task provided and has override)
2. DB prompt for (account, function)
3. Default fallback from registry
Args:
function_name: AI function name (e.g., 'auto_cluster', 'generate_ideas')
account: Account object (optional)
task: Task object with optional prompt_override (optional)
context: Additional context for prompt rendering (optional)
Returns:
Prompt string ready for formatting
"""
# Step 1: Check task-level override
if task and hasattr(task, 'prompt_override') and task.prompt_override:
logger.info(f"Using task-level prompt override for {function_name}")
prompt = task.prompt_override
return cls._render_prompt(prompt, context or {})
# Step 2: Get prompt type
prompt_type = cls.FUNCTION_TO_PROMPT_TYPE.get(function_name, function_name)
# Step 3: Try DB prompt
if account:
try:
from igny8_core.modules.system.models import AIPrompt
db_prompt = AIPrompt.objects.get(
account=account,
prompt_type=prompt_type,
is_active=True
)
logger.info(f"Using DB prompt for {function_name} (account {account.id})")
prompt = db_prompt.prompt_value
return cls._render_prompt(prompt, context or {})
except Exception as e:
logger.debug(f"No DB prompt found for {function_name}: {e}")
# Step 4: Use default fallback
prompt = cls.DEFAULT_PROMPTS.get(prompt_type, '')
if not prompt:
logger.warning(f"No default prompt found for {prompt_type}, using empty string")
return cls._render_prompt(prompt, context or {})
@classmethod
def _render_prompt(cls, prompt_template: str, context: Dict[str, Any]) -> str:
"""
Render prompt template with context variables.
Supports both .format() style ({variable}) and placeholder replacement ([IGNY8_*]).
Args:
prompt_template: Prompt template string
context: Context variables for rendering
Returns:
Rendered prompt string
"""
if not context:
return prompt_template
rendered = prompt_template
# Step 1: Replace [IGNY8_*] placeholders first (always do this)
for key, value in context.items():
placeholder = f'[IGNY8_{key.upper()}]'
if placeholder in rendered:
rendered = rendered.replace(placeholder, str(value))
logger.debug(f"Replaced placeholder {placeholder} with {len(str(value))} characters")
# Step 2: Try .format() style for {variable} placeholders (if any remain)
# Normalize context keys - convert UPPER to lowercase for .format()
normalized_context = {}
for key, value in context.items():
# Try both original key and lowercase version
normalized_context[key] = value
normalized_context[key.lower()] = value
# Only try .format() if there are {variable} placeholders
if '{' in rendered and '}' in rendered:
try:
rendered = rendered.format(**normalized_context)
except (KeyError, ValueError) as e:
# If .format() fails, log warning but keep the [IGNY8_*] replacements
logger.warning(f"Failed to format prompt with .format(): {e}. Using [IGNY8_*] replacements only.")
return rendered
@classmethod
def get_image_prompt_template(cls, account: Optional[Any] = None) -> str:
"""
Get image prompt template.
Returns template string (not rendered) - caller should format with .format()
"""
prompt_type = 'image_prompt_template'
# Try DB prompt
if account:
try:
from igny8_core.modules.system.models import AIPrompt
db_prompt = AIPrompt.objects.get(
account=account,
prompt_type=prompt_type,
is_active=True
)
return db_prompt.prompt_value
except Exception:
pass
# Use default
return cls.DEFAULT_PROMPTS.get(prompt_type, '')
@classmethod
def get_negative_prompt(cls, account: Optional[Any] = None) -> str:
"""
Get negative prompt.
Returns template string (not rendered).
"""
prompt_type = 'negative_prompt'
# Try DB prompt
if account:
try:
from igny8_core.modules.system.models import AIPrompt
db_prompt = AIPrompt.objects.get(
account=account,
prompt_type=prompt_type,
is_active=True
)
return db_prompt.prompt_value
except Exception:
pass
# Use default
return cls.DEFAULT_PROMPTS.get(prompt_type, '')
# Convenience function for backward compatibility
def get_prompt(function_name: str, account=None, task=None, context=None) -> str:
"""Get prompt using registry"""
return PromptRegistry.get_prompt(function_name, account=account, task=task, context=context)