Implement Stage 3: Enhance content metadata and validation features

- Added entity metadata fields to the Tasks model, including entity_type, taxonomy, and cluster_role.
- Updated CandidateEngine to prioritize content relevance based on cluster mappings.
- Introduced metadata completeness scoring in ContentAnalyzer.
- Enhanced validation services to check for entity type and mapping completeness.
- Updated frontend components to display and validate new metadata fields.
- Implemented API endpoints for content validation and metadata persistence.
- Migrated existing data to populate new metadata fields for Tasks and Content.
This commit is contained in:
IGNY8 VPS (Salman)
2025-11-19 19:21:30 +00:00
parent 38f6026e73
commit bae9ea47d8
33 changed files with 2388 additions and 73 deletions

View File

@@ -53,6 +53,46 @@ class Tasks(SiteSectorBaseModel):
content_type = models.CharField(max_length=50, choices=CONTENT_TYPE_CHOICES, default='blog_post')
status = models.CharField(max_length=50, choices=STATUS_CHOICES, default='queued')
# Stage 3: Entity metadata fields
ENTITY_TYPE_CHOICES = [
('blog_post', 'Blog Post'),
('article', 'Article'),
('product', 'Product'),
('service', 'Service Page'),
('taxonomy', 'Taxonomy Page'),
('page', 'Page'),
]
CLUSTER_ROLE_CHOICES = [
('hub', 'Hub Page'),
('supporting', 'Supporting Page'),
('attribute', 'Attribute Page'),
]
entity_type = models.CharField(
max_length=50,
choices=ENTITY_TYPE_CHOICES,
default='blog_post',
db_index=True,
blank=True,
null=True,
help_text="Type of content entity (inherited from idea/blueprint)"
)
taxonomy = models.ForeignKey(
'site_building.SiteBlueprintTaxonomy',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='tasks',
help_text="Taxonomy association when derived from blueprint planning"
)
cluster_role = models.CharField(
max_length=50,
choices=CLUSTER_ROLE_CHOICES,
default='hub',
blank=True,
null=True,
help_text="Role within the cluster-driven sitemap"
)
# Content fields
content = models.TextField(blank=True, null=True) # Generated content
word_count = models.IntegerField(default=0)
@@ -78,6 +118,8 @@ class Tasks(SiteSectorBaseModel):
models.Index(fields=['status']),
models.Index(fields=['cluster']),
models.Index(fields=['content_type']),
models.Index(fields=['entity_type']),
models.Index(fields=['cluster_role']),
models.Index(fields=['site', 'sector']),
]

View File

@@ -0,0 +1,116 @@
"""
Metadata Mapping Service
Stage 3: Persists cluster/taxonomy/attribute mappings from Tasks to Content
"""
import logging
from typing import Optional
from django.db import transaction
from igny8_core.business.content.models import (
Tasks,
Content,
ContentClusterMap,
ContentTaxonomyMap,
ContentAttributeMap,
)
logger = logging.getLogger(__name__)
class MetadataMappingService:
"""Service for persisting metadata mappings from Tasks to Content"""
@transaction.atomic
def persist_task_metadata_to_content(self, content: Content) -> None:
"""
Persist cluster/taxonomy/attribute mappings from Task to Content.
Args:
content: Content instance with an associated task
"""
if not content.task:
logger.warning(f"Content {content.id} has no associated task, skipping metadata mapping")
return
task = content.task
# Stage 3: Persist cluster mapping if task has cluster
if task.cluster:
ContentClusterMap.objects.get_or_create(
content=content,
cluster=task.cluster,
role=task.cluster_role or 'hub',
defaults={
'account': content.account,
'site': content.site,
'sector': content.sector,
'source': 'blueprint' if task.idea else 'manual',
'metadata': {},
}
)
logger.info(f"Created cluster mapping for content {content.id} -> cluster {task.cluster.id}")
# Stage 3: Persist taxonomy mapping if task has taxonomy
if task.taxonomy:
ContentTaxonomyMap.objects.get_or_create(
content=content,
taxonomy=task.taxonomy,
defaults={
'account': content.account,
'site': content.site,
'sector': content.sector,
'source': 'blueprint',
'metadata': {},
}
)
logger.info(f"Created taxonomy mapping for content {content.id} -> taxonomy {task.taxonomy.id}")
# Stage 3: Inherit entity_type from task
if task.entity_type and not content.entity_type:
content.entity_type = task.entity_type
content.save(update_fields=['entity_type'])
logger.info(f"Set entity_type {task.entity_type} for content {content.id}")
# Stage 3: Extract attributes from task metadata if available
# This can be extended to parse task.description or task.metadata for attributes
# For now, we'll rely on explicit attribute data in future enhancements
@transaction.atomic
def backfill_content_metadata(self, content: Content) -> None:
"""
Backfill metadata mappings for existing content that may be missing mappings.
Args:
content: Content instance to backfill
"""
# If content already has mappings, skip
if ContentClusterMap.objects.filter(content=content).exists():
return
# Try to infer from task
if content.task:
self.persist_task_metadata_to_content(content)
return
# Try to infer from content metadata
if content.metadata:
cluster_id = content.metadata.get('cluster_id')
if cluster_id:
from igny8_core.business.planning.models import Clusters
try:
cluster = Clusters.objects.get(id=cluster_id)
ContentClusterMap.objects.get_or_create(
content=content,
cluster=cluster,
role='hub', # Default
defaults={
'account': content.account,
'site': content.site,
'sector': content.sector,
'source': 'manual',
'metadata': {},
}
)
except Clusters.DoesNotExist:
logger.warning(f"Cluster {cluster_id} not found for content {content.id}")

View File

@@ -0,0 +1,170 @@
"""
Content Validation Service
Stage 3: Validates content metadata before publish
"""
import logging
from typing import List, Dict, Optional
from django.core.exceptions import ValidationError
from igny8_core.business.content.models import Tasks, Content
logger = logging.getLogger(__name__)
class ContentValidationService:
"""Service for validating content metadata requirements"""
def validate_task(self, task: Tasks) -> List[Dict[str, str]]:
"""
Validate a task has required metadata.
Args:
task: Task instance to validate
Returns:
List of validation errors (empty if valid)
"""
errors = []
# Stage 3: Enforce "no cluster, no task" rule when feature flag enabled
from django.conf import settings
if getattr(settings, 'USE_SITE_BUILDER_REFACTOR', False):
if not task.cluster:
errors.append({
'field': 'cluster',
'code': 'missing_cluster',
'message': 'Task must be associated with a cluster before content generation',
})
# Stage 3: Validate entity_type is set
if not task.entity_type:
errors.append({
'field': 'entity_type',
'code': 'missing_entity_type',
'message': 'Task must have an entity type specified',
})
# Stage 3: Validate taxonomy for product/service entities
if task.entity_type in ['product', 'service']:
if not task.taxonomy:
errors.append({
'field': 'taxonomy',
'code': 'missing_taxonomy',
'message': f'{task.entity_type.title()} tasks require a taxonomy association',
})
return errors
def validate_content(self, content: Content) -> List[Dict[str, str]]:
"""
Validate content has required metadata before publish.
Args:
content: Content instance to validate
Returns:
List of validation errors (empty if valid)
"""
errors = []
# Stage 3: Validate entity_type
if not content.entity_type:
errors.append({
'field': 'entity_type',
'code': 'missing_entity_type',
'message': 'Content must have an entity type specified',
})
# Stage 3: Validate cluster mapping exists for IGNY8 content
if content.source == 'igny8':
from igny8_core.business.content.models import ContentClusterMap
if not ContentClusterMap.objects.filter(content=content).exists():
errors.append({
'field': 'cluster_mapping',
'code': 'missing_cluster_mapping',
'message': 'Content must be mapped to at least one cluster',
})
# Stage 3: Validate taxonomy for product/service content
if content.entity_type in ['product', 'service']:
from igny8_core.business.content.models import ContentTaxonomyMap
if not ContentTaxonomyMap.objects.filter(content=content).exists():
errors.append({
'field': 'taxonomy_mapping',
'code': 'missing_taxonomy_mapping',
'message': f'{content.entity_type.title()} content requires a taxonomy mapping',
})
# Stage 3: Validate required attributes for products
if content.entity_type == 'product':
from igny8_core.business.content.models import ContentAttributeMap
required_attrs = ['price', 'sku', 'category']
existing_attrs = ContentAttributeMap.objects.filter(
content=content,
name__in=required_attrs
).values_list('name', flat=True)
missing_attrs = set(required_attrs) - set(existing_attrs)
if missing_attrs:
errors.append({
'field': 'attributes',
'code': 'missing_attributes',
'message': f'Product content requires attributes: {", ".join(missing_attrs)}',
})
return errors
def validate_for_publish(self, content: Content) -> List[Dict[str, str]]:
"""
Comprehensive validation before publishing content.
Args:
content: Content instance to validate
Returns:
List of validation errors (empty if ready to publish)
"""
errors = []
# Basic content validation
errors.extend(self.validate_content(content))
# Additional publish requirements
if not content.title:
errors.append({
'field': 'title',
'code': 'missing_title',
'message': 'Content must have a title before publishing',
})
if not content.html_content or len(content.html_content.strip()) < 100:
errors.append({
'field': 'html_content',
'code': 'insufficient_content',
'message': 'Content must have at least 100 characters before publishing',
})
return errors
def ensure_required_attributes(self, task: Tasks) -> List[Dict[str, str]]:
"""
Check if task has required attributes based on entity type.
Args:
task: Task instance to check
Returns:
List of missing attribute errors
"""
errors = []
if task.entity_type == 'product':
# Products should have taxonomy and cluster
if not task.taxonomy:
errors.append({
'field': 'taxonomy',
'code': 'missing_taxonomy',
'message': 'Product tasks require a taxonomy (product category)',
})
return errors