Implement Stage 3: Enhance content metadata and validation features

- Added entity metadata fields to the Tasks model, including entity_type, taxonomy, and cluster_role.
- Updated CandidateEngine to prioritize content relevance based on cluster mappings.
- Introduced metadata completeness scoring in ContentAnalyzer.
- Enhanced validation services to check for entity type and mapping completeness.
- Updated frontend components to display and validate new metadata fields.
- Implemented API endpoints for content validation and metadata persistence.
- Migrated existing data to populate new metadata fields for Tasks and Content.
This commit is contained in:
IGNY8 VPS (Salman)
2025-11-19 19:21:30 +00:00
parent 38f6026e73
commit bae9ea47d8
33 changed files with 2388 additions and 73 deletions

View File

@@ -4,17 +4,121 @@ import django.db.models.deletion
def backfill_metadata_mappings_stub(apps, schema_editor):
"""
Stage 1: Placeholder for Stage 3 metadata backfill.
Stage 3: Backfill metadata mappings for existing Content/Task records.
This function will be extended in Stage 3 to backfill:
This function backfills:
- ContentClusterMap records from existing Content/Task -> Cluster relationships
- ContentTaxonomyMap records from existing taxonomy associations
- ContentAttributeMap records from existing attribute data
For now, this is a no-op to establish the migration hook.
- entity_type on Tasks from existing content_type or other fields (if field exists)
"""
# Stage 1: No-op - tables created, ready for Stage 3 backfill
pass
Tasks = apps.get_model('writer', 'Tasks')
Content = apps.get_model('writer', 'Content')
ContentClusterMap = apps.get_model('writer', 'ContentClusterMap')
ContentTaxonomyMap = apps.get_model('writer', 'ContentTaxonomyMap')
ContentAttributeMap = apps.get_model('writer', 'ContentAttributeMap')
# Check if entity_type field exists (added in migration 0013)
task_fields = [f.name for f in Tasks._meta.get_fields()]
has_entity_type = 'entity_type' in task_fields
# Backfill Tasks: Set entity_type from content_type if field exists and not set
tasks_updated = 0
if has_entity_type:
for task in Tasks.objects.filter(entity_type__isnull=True):
# Map content_type to entity_type
entity_type_map = {
'blog_post': 'blog_post',
'article': 'article',
'guide': 'article',
'tutorial': 'article',
}
task.entity_type = entity_type_map.get(task.content_type, 'blog_post')
task.save(update_fields=['entity_type'])
tasks_updated += 1
# Backfill Content: Set entity_type from task if not set
content_updated = 0
content_fields = [f.name for f in Content._meta.get_fields()]
if 'entity_type' in content_fields:
for content in Content.objects.filter(entity_type__isnull=True):
if content.task and has_entity_type and hasattr(content.task, 'entity_type') and content.task.entity_type:
content.entity_type = content.task.entity_type
content.save(update_fields=['entity_type'])
content_updated += 1
# Backfill ContentClusterMap: Create mappings from Task->Cluster relationships
cluster_maps_created = 0
has_cluster_role = 'cluster_role' in task_fields
content_fields = [f.name for f in Content._meta.get_fields()]
for task in Tasks.objects.filter(cluster__isnull=False):
# Find all Content records for this task
contents = Content.objects.filter(task=task)
for content in contents:
# Check if mapping already exists
if not ContentClusterMap.objects.filter(
content=content,
cluster=task.cluster
).exists():
# Get cluster_role if field exists
role = 'hub' # Default
if has_cluster_role and hasattr(task, 'cluster_role') and task.cluster_role:
role = task.cluster_role
# Get account/site/sector from content or task
account_id = getattr(content, 'account_id', None) or getattr(content, 'tenant_id', None) or getattr(task, 'account_id', None) or getattr(task, 'tenant_id', None)
site_id = getattr(content, 'site_id', None) or getattr(task, 'site_id', None)
sector_id = getattr(content, 'sector_id', None) or getattr(task, 'sector_id', None)
if account_id and site_id and sector_id:
ContentClusterMap.objects.create(
content=content,
task=task,
cluster=task.cluster,
role=role,
account_id=account_id,
site_id=site_id,
sector_id=sector_id,
source='blueprint' if task.idea else 'manual',
metadata={},
)
cluster_maps_created += 1
# Backfill ContentTaxonomyMap: Create mappings from Task->Taxonomy relationships
taxonomy_maps_created = 0
has_taxonomy = 'taxonomy' in task_fields
if has_taxonomy:
for task in Tasks.objects.filter(taxonomy__isnull=False):
contents = Content.objects.filter(task=task)
for content in contents:
if not ContentTaxonomyMap.objects.filter(
content=content,
taxonomy=task.taxonomy
).exists():
# Get account/site/sector from content or task
account_id = getattr(content, 'account_id', None) or getattr(content, 'tenant_id', None) or getattr(task, 'account_id', None) or getattr(task, 'tenant_id', None)
site_id = getattr(content, 'site_id', None) or getattr(task, 'site_id', None)
sector_id = getattr(content, 'sector_id', None) or getattr(task, 'sector_id', None)
if account_id and site_id and sector_id:
ContentTaxonomyMap.objects.create(
content=content,
task=task,
taxonomy=task.taxonomy,
account_id=account_id,
site_id=site_id,
sector_id=sector_id,
source='blueprint',
metadata={},
)
taxonomy_maps_created += 1
print(f"Backfill complete:")
print(f" - Tasks entity_type updated: {tasks_updated}")
print(f" - Content entity_type updated: {content_updated}")
print(f" - Cluster mappings created: {cluster_maps_created}")
print(f" - Taxonomy mappings created: {taxonomy_maps_created}")
def reverse_backfill_metadata_mappings_stub(apps, schema_editor):

View File

@@ -0,0 +1,70 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('writer', '0012_metadata_mapping_tables'),
('site_building', '0003_workflow_and_taxonomies'),
]
operations = [
migrations.AddField(
model_name='tasks',
name='entity_type',
field=models.CharField(
blank=True,
choices=[
('blog_post', 'Blog Post'),
('article', 'Article'),
('product', 'Product'),
('service', 'Service Page'),
('taxonomy', 'Taxonomy Page'),
('page', 'Page'),
],
db_index=True,
default='blog_post',
help_text='Type of content entity (inherited from idea/blueprint)',
max_length=50,
null=True,
),
),
migrations.AddField(
model_name='tasks',
name='taxonomy',
field=models.ForeignKey(
blank=True,
help_text='Taxonomy association when derived from blueprint planning',
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='tasks',
to='site_building.SiteBlueprintTaxonomy',
),
),
migrations.AddField(
model_name='tasks',
name='cluster_role',
field=models.CharField(
blank=True,
choices=[
('hub', 'Hub Page'),
('supporting', 'Supporting Page'),
('attribute', 'Attribute Page'),
],
default='hub',
help_text='Role within the cluster-driven sitemap',
max_length=50,
null=True,
),
),
migrations.AddIndex(
model_name='tasks',
index=models.Index(fields=['entity_type'], name='writer_tasks_entity_type_idx'),
),
migrations.AddIndex(
model_name='tasks',
index=models.Index(fields=['cluster_role'], name='writer_tasks_cluster_role_idx'),
),
]