Implement Stage 3: Enhance content metadata and validation features
- Added entity metadata fields to the Tasks model, including entity_type, taxonomy, and cluster_role. - Updated CandidateEngine to prioritize content relevance based on cluster mappings. - Introduced metadata completeness scoring in ContentAnalyzer. - Enhanced validation services to check for entity type and mapping completeness. - Updated frontend components to display and validate new metadata fields. - Implemented API endpoints for content validation and metadata persistence. - Migrated existing data to populate new metadata fields for Tasks and Content.
This commit is contained in:
@@ -4,17 +4,121 @@ import django.db.models.deletion
|
||||
|
||||
def backfill_metadata_mappings_stub(apps, schema_editor):
|
||||
"""
|
||||
Stage 1: Placeholder for Stage 3 metadata backfill.
|
||||
Stage 3: Backfill metadata mappings for existing Content/Task records.
|
||||
|
||||
This function will be extended in Stage 3 to backfill:
|
||||
This function backfills:
|
||||
- ContentClusterMap records from existing Content/Task -> Cluster relationships
|
||||
- ContentTaxonomyMap records from existing taxonomy associations
|
||||
- ContentAttributeMap records from existing attribute data
|
||||
|
||||
For now, this is a no-op to establish the migration hook.
|
||||
- entity_type on Tasks from existing content_type or other fields (if field exists)
|
||||
"""
|
||||
# Stage 1: No-op - tables created, ready for Stage 3 backfill
|
||||
pass
|
||||
Tasks = apps.get_model('writer', 'Tasks')
|
||||
Content = apps.get_model('writer', 'Content')
|
||||
ContentClusterMap = apps.get_model('writer', 'ContentClusterMap')
|
||||
ContentTaxonomyMap = apps.get_model('writer', 'ContentTaxonomyMap')
|
||||
ContentAttributeMap = apps.get_model('writer', 'ContentAttributeMap')
|
||||
|
||||
# Check if entity_type field exists (added in migration 0013)
|
||||
task_fields = [f.name for f in Tasks._meta.get_fields()]
|
||||
has_entity_type = 'entity_type' in task_fields
|
||||
|
||||
# Backfill Tasks: Set entity_type from content_type if field exists and not set
|
||||
tasks_updated = 0
|
||||
if has_entity_type:
|
||||
for task in Tasks.objects.filter(entity_type__isnull=True):
|
||||
# Map content_type to entity_type
|
||||
entity_type_map = {
|
||||
'blog_post': 'blog_post',
|
||||
'article': 'article',
|
||||
'guide': 'article',
|
||||
'tutorial': 'article',
|
||||
}
|
||||
task.entity_type = entity_type_map.get(task.content_type, 'blog_post')
|
||||
task.save(update_fields=['entity_type'])
|
||||
tasks_updated += 1
|
||||
|
||||
# Backfill Content: Set entity_type from task if not set
|
||||
content_updated = 0
|
||||
content_fields = [f.name for f in Content._meta.get_fields()]
|
||||
if 'entity_type' in content_fields:
|
||||
for content in Content.objects.filter(entity_type__isnull=True):
|
||||
if content.task and has_entity_type and hasattr(content.task, 'entity_type') and content.task.entity_type:
|
||||
content.entity_type = content.task.entity_type
|
||||
content.save(update_fields=['entity_type'])
|
||||
content_updated += 1
|
||||
|
||||
# Backfill ContentClusterMap: Create mappings from Task->Cluster relationships
|
||||
cluster_maps_created = 0
|
||||
has_cluster_role = 'cluster_role' in task_fields
|
||||
content_fields = [f.name for f in Content._meta.get_fields()]
|
||||
|
||||
for task in Tasks.objects.filter(cluster__isnull=False):
|
||||
# Find all Content records for this task
|
||||
contents = Content.objects.filter(task=task)
|
||||
for content in contents:
|
||||
# Check if mapping already exists
|
||||
if not ContentClusterMap.objects.filter(
|
||||
content=content,
|
||||
cluster=task.cluster
|
||||
).exists():
|
||||
# Get cluster_role if field exists
|
||||
role = 'hub' # Default
|
||||
if has_cluster_role and hasattr(task, 'cluster_role') and task.cluster_role:
|
||||
role = task.cluster_role
|
||||
|
||||
# Get account/site/sector from content or task
|
||||
account_id = getattr(content, 'account_id', None) or getattr(content, 'tenant_id', None) or getattr(task, 'account_id', None) or getattr(task, 'tenant_id', None)
|
||||
site_id = getattr(content, 'site_id', None) or getattr(task, 'site_id', None)
|
||||
sector_id = getattr(content, 'sector_id', None) or getattr(task, 'sector_id', None)
|
||||
|
||||
if account_id and site_id and sector_id:
|
||||
ContentClusterMap.objects.create(
|
||||
content=content,
|
||||
task=task,
|
||||
cluster=task.cluster,
|
||||
role=role,
|
||||
account_id=account_id,
|
||||
site_id=site_id,
|
||||
sector_id=sector_id,
|
||||
source='blueprint' if task.idea else 'manual',
|
||||
metadata={},
|
||||
)
|
||||
cluster_maps_created += 1
|
||||
|
||||
# Backfill ContentTaxonomyMap: Create mappings from Task->Taxonomy relationships
|
||||
taxonomy_maps_created = 0
|
||||
has_taxonomy = 'taxonomy' in task_fields
|
||||
if has_taxonomy:
|
||||
for task in Tasks.objects.filter(taxonomy__isnull=False):
|
||||
contents = Content.objects.filter(task=task)
|
||||
for content in contents:
|
||||
if not ContentTaxonomyMap.objects.filter(
|
||||
content=content,
|
||||
taxonomy=task.taxonomy
|
||||
).exists():
|
||||
# Get account/site/sector from content or task
|
||||
account_id = getattr(content, 'account_id', None) or getattr(content, 'tenant_id', None) or getattr(task, 'account_id', None) or getattr(task, 'tenant_id', None)
|
||||
site_id = getattr(content, 'site_id', None) or getattr(task, 'site_id', None)
|
||||
sector_id = getattr(content, 'sector_id', None) or getattr(task, 'sector_id', None)
|
||||
|
||||
if account_id and site_id and sector_id:
|
||||
ContentTaxonomyMap.objects.create(
|
||||
content=content,
|
||||
task=task,
|
||||
taxonomy=task.taxonomy,
|
||||
account_id=account_id,
|
||||
site_id=site_id,
|
||||
sector_id=sector_id,
|
||||
source='blueprint',
|
||||
metadata={},
|
||||
)
|
||||
taxonomy_maps_created += 1
|
||||
|
||||
print(f"Backfill complete:")
|
||||
print(f" - Tasks entity_type updated: {tasks_updated}")
|
||||
print(f" - Content entity_type updated: {content_updated}")
|
||||
print(f" - Cluster mappings created: {cluster_maps_created}")
|
||||
print(f" - Taxonomy mappings created: {taxonomy_maps_created}")
|
||||
|
||||
|
||||
def reverse_backfill_metadata_mappings_stub(apps, schema_editor):
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('writer', '0012_metadata_mapping_tables'),
|
||||
('site_building', '0003_workflow_and_taxonomies'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='tasks',
|
||||
name='entity_type',
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
choices=[
|
||||
('blog_post', 'Blog Post'),
|
||||
('article', 'Article'),
|
||||
('product', 'Product'),
|
||||
('service', 'Service Page'),
|
||||
('taxonomy', 'Taxonomy Page'),
|
||||
('page', 'Page'),
|
||||
],
|
||||
db_index=True,
|
||||
default='blog_post',
|
||||
help_text='Type of content entity (inherited from idea/blueprint)',
|
||||
max_length=50,
|
||||
null=True,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tasks',
|
||||
name='taxonomy',
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
help_text='Taxonomy association when derived from blueprint planning',
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name='tasks',
|
||||
to='site_building.SiteBlueprintTaxonomy',
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tasks',
|
||||
name='cluster_role',
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
choices=[
|
||||
('hub', 'Hub Page'),
|
||||
('supporting', 'Supporting Page'),
|
||||
('attribute', 'Attribute Page'),
|
||||
],
|
||||
default='hub',
|
||||
help_text='Role within the cluster-driven sitemap',
|
||||
max_length=50,
|
||||
null=True,
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='tasks',
|
||||
index=models.Index(fields=['entity_type'], name='writer_tasks_entity_type_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='tasks',
|
||||
index=models.Index(fields=['cluster_role'], name='writer_tasks_cluster_role_idx'),
|
||||
),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user