Implement Stage 3: Enhance content metadata and validation features
- Added entity metadata fields to the Tasks model, including entity_type, taxonomy, and cluster_role. - Updated CandidateEngine to prioritize content relevance based on cluster mappings. - Introduced metadata completeness scoring in ContentAnalyzer. - Enhanced validation services to check for entity type and mapping completeness. - Updated frontend components to display and validate new metadata fields. - Implemented API endpoints for content validation and metadata persistence. - Migrated existing data to populate new metadata fields for Tasks and Content.
This commit is contained in:
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
Metadata Mapping Service
|
||||
Stage 3: Persists cluster/taxonomy/attribute mappings from Tasks to Content
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
from django.db import transaction
|
||||
|
||||
from igny8_core.business.content.models import (
|
||||
Tasks,
|
||||
Content,
|
||||
ContentClusterMap,
|
||||
ContentTaxonomyMap,
|
||||
ContentAttributeMap,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetadataMappingService:
|
||||
"""Service for persisting metadata mappings from Tasks to Content"""
|
||||
|
||||
@transaction.atomic
|
||||
def persist_task_metadata_to_content(self, content: Content) -> None:
|
||||
"""
|
||||
Persist cluster/taxonomy/attribute mappings from Task to Content.
|
||||
|
||||
Args:
|
||||
content: Content instance with an associated task
|
||||
"""
|
||||
if not content.task:
|
||||
logger.warning(f"Content {content.id} has no associated task, skipping metadata mapping")
|
||||
return
|
||||
|
||||
task = content.task
|
||||
|
||||
# Stage 3: Persist cluster mapping if task has cluster
|
||||
if task.cluster:
|
||||
ContentClusterMap.objects.get_or_create(
|
||||
content=content,
|
||||
cluster=task.cluster,
|
||||
role=task.cluster_role or 'hub',
|
||||
defaults={
|
||||
'account': content.account,
|
||||
'site': content.site,
|
||||
'sector': content.sector,
|
||||
'source': 'blueprint' if task.idea else 'manual',
|
||||
'metadata': {},
|
||||
}
|
||||
)
|
||||
logger.info(f"Created cluster mapping for content {content.id} -> cluster {task.cluster.id}")
|
||||
|
||||
# Stage 3: Persist taxonomy mapping if task has taxonomy
|
||||
if task.taxonomy:
|
||||
ContentTaxonomyMap.objects.get_or_create(
|
||||
content=content,
|
||||
taxonomy=task.taxonomy,
|
||||
defaults={
|
||||
'account': content.account,
|
||||
'site': content.site,
|
||||
'sector': content.sector,
|
||||
'source': 'blueprint',
|
||||
'metadata': {},
|
||||
}
|
||||
)
|
||||
logger.info(f"Created taxonomy mapping for content {content.id} -> taxonomy {task.taxonomy.id}")
|
||||
|
||||
# Stage 3: Inherit entity_type from task
|
||||
if task.entity_type and not content.entity_type:
|
||||
content.entity_type = task.entity_type
|
||||
content.save(update_fields=['entity_type'])
|
||||
logger.info(f"Set entity_type {task.entity_type} for content {content.id}")
|
||||
|
||||
# Stage 3: Extract attributes from task metadata if available
|
||||
# This can be extended to parse task.description or task.metadata for attributes
|
||||
# For now, we'll rely on explicit attribute data in future enhancements
|
||||
|
||||
@transaction.atomic
|
||||
def backfill_content_metadata(self, content: Content) -> None:
|
||||
"""
|
||||
Backfill metadata mappings for existing content that may be missing mappings.
|
||||
|
||||
Args:
|
||||
content: Content instance to backfill
|
||||
"""
|
||||
# If content already has mappings, skip
|
||||
if ContentClusterMap.objects.filter(content=content).exists():
|
||||
return
|
||||
|
||||
# Try to infer from task
|
||||
if content.task:
|
||||
self.persist_task_metadata_to_content(content)
|
||||
return
|
||||
|
||||
# Try to infer from content metadata
|
||||
if content.metadata:
|
||||
cluster_id = content.metadata.get('cluster_id')
|
||||
if cluster_id:
|
||||
from igny8_core.business.planning.models import Clusters
|
||||
try:
|
||||
cluster = Clusters.objects.get(id=cluster_id)
|
||||
ContentClusterMap.objects.get_or_create(
|
||||
content=content,
|
||||
cluster=cluster,
|
||||
role='hub', # Default
|
||||
defaults={
|
||||
'account': content.account,
|
||||
'site': content.site,
|
||||
'sector': content.sector,
|
||||
'source': 'manual',
|
||||
'metadata': {},
|
||||
}
|
||||
)
|
||||
except Clusters.DoesNotExist:
|
||||
logger.warning(f"Cluster {cluster_id} not found for content {content.id}")
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Content Validation Service
|
||||
Stage 3: Validates content metadata before publish
|
||||
"""
|
||||
import logging
|
||||
from typing import List, Dict, Optional
|
||||
from django.core.exceptions import ValidationError
|
||||
|
||||
from igny8_core.business.content.models import Tasks, Content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ContentValidationService:
|
||||
"""Service for validating content metadata requirements"""
|
||||
|
||||
def validate_task(self, task: Tasks) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Validate a task has required metadata.
|
||||
|
||||
Args:
|
||||
task: Task instance to validate
|
||||
|
||||
Returns:
|
||||
List of validation errors (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
# Stage 3: Enforce "no cluster, no task" rule when feature flag enabled
|
||||
from django.conf import settings
|
||||
if getattr(settings, 'USE_SITE_BUILDER_REFACTOR', False):
|
||||
if not task.cluster:
|
||||
errors.append({
|
||||
'field': 'cluster',
|
||||
'code': 'missing_cluster',
|
||||
'message': 'Task must be associated with a cluster before content generation',
|
||||
})
|
||||
|
||||
# Stage 3: Validate entity_type is set
|
||||
if not task.entity_type:
|
||||
errors.append({
|
||||
'field': 'entity_type',
|
||||
'code': 'missing_entity_type',
|
||||
'message': 'Task must have an entity type specified',
|
||||
})
|
||||
|
||||
# Stage 3: Validate taxonomy for product/service entities
|
||||
if task.entity_type in ['product', 'service']:
|
||||
if not task.taxonomy:
|
||||
errors.append({
|
||||
'field': 'taxonomy',
|
||||
'code': 'missing_taxonomy',
|
||||
'message': f'{task.entity_type.title()} tasks require a taxonomy association',
|
||||
})
|
||||
|
||||
return errors
|
||||
|
||||
def validate_content(self, content: Content) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Validate content has required metadata before publish.
|
||||
|
||||
Args:
|
||||
content: Content instance to validate
|
||||
|
||||
Returns:
|
||||
List of validation errors (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
# Stage 3: Validate entity_type
|
||||
if not content.entity_type:
|
||||
errors.append({
|
||||
'field': 'entity_type',
|
||||
'code': 'missing_entity_type',
|
||||
'message': 'Content must have an entity type specified',
|
||||
})
|
||||
|
||||
# Stage 3: Validate cluster mapping exists for IGNY8 content
|
||||
if content.source == 'igny8':
|
||||
from igny8_core.business.content.models import ContentClusterMap
|
||||
if not ContentClusterMap.objects.filter(content=content).exists():
|
||||
errors.append({
|
||||
'field': 'cluster_mapping',
|
||||
'code': 'missing_cluster_mapping',
|
||||
'message': 'Content must be mapped to at least one cluster',
|
||||
})
|
||||
|
||||
# Stage 3: Validate taxonomy for product/service content
|
||||
if content.entity_type in ['product', 'service']:
|
||||
from igny8_core.business.content.models import ContentTaxonomyMap
|
||||
if not ContentTaxonomyMap.objects.filter(content=content).exists():
|
||||
errors.append({
|
||||
'field': 'taxonomy_mapping',
|
||||
'code': 'missing_taxonomy_mapping',
|
||||
'message': f'{content.entity_type.title()} content requires a taxonomy mapping',
|
||||
})
|
||||
|
||||
# Stage 3: Validate required attributes for products
|
||||
if content.entity_type == 'product':
|
||||
from igny8_core.business.content.models import ContentAttributeMap
|
||||
required_attrs = ['price', 'sku', 'category']
|
||||
existing_attrs = ContentAttributeMap.objects.filter(
|
||||
content=content,
|
||||
name__in=required_attrs
|
||||
).values_list('name', flat=True)
|
||||
missing_attrs = set(required_attrs) - set(existing_attrs)
|
||||
if missing_attrs:
|
||||
errors.append({
|
||||
'field': 'attributes',
|
||||
'code': 'missing_attributes',
|
||||
'message': f'Product content requires attributes: {", ".join(missing_attrs)}',
|
||||
})
|
||||
|
||||
return errors
|
||||
|
||||
def validate_for_publish(self, content: Content) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Comprehensive validation before publishing content.
|
||||
|
||||
Args:
|
||||
content: Content instance to validate
|
||||
|
||||
Returns:
|
||||
List of validation errors (empty if ready to publish)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
# Basic content validation
|
||||
errors.extend(self.validate_content(content))
|
||||
|
||||
# Additional publish requirements
|
||||
if not content.title:
|
||||
errors.append({
|
||||
'field': 'title',
|
||||
'code': 'missing_title',
|
||||
'message': 'Content must have a title before publishing',
|
||||
})
|
||||
|
||||
if not content.html_content or len(content.html_content.strip()) < 100:
|
||||
errors.append({
|
||||
'field': 'html_content',
|
||||
'code': 'insufficient_content',
|
||||
'message': 'Content must have at least 100 characters before publishing',
|
||||
})
|
||||
|
||||
return errors
|
||||
|
||||
def ensure_required_attributes(self, task: Tasks) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Check if task has required attributes based on entity type.
|
||||
|
||||
Args:
|
||||
task: Task instance to check
|
||||
|
||||
Returns:
|
||||
List of missing attribute errors
|
||||
"""
|
||||
errors = []
|
||||
|
||||
if task.entity_type == 'product':
|
||||
# Products should have taxonomy and cluster
|
||||
if not task.taxonomy:
|
||||
errors.append({
|
||||
'field': 'taxonomy',
|
||||
'code': 'missing_taxonomy',
|
||||
'message': 'Product tasks require a taxonomy (product category)',
|
||||
})
|
||||
|
||||
return errors
|
||||
|
||||
Reference in New Issue
Block a user