Implement Stage 3: Enhance content metadata and validation features

- Added entity metadata fields to the Tasks model, including entity_type, taxonomy, and cluster_role.
- Updated CandidateEngine to prioritize content relevance based on cluster mappings.
- Introduced metadata completeness scoring in ContentAnalyzer.
- Enhanced validation services to check for entity type and mapping completeness.
- Updated frontend components to display and validate new metadata fields.
- Implemented API endpoints for content validation and metadata persistence.
- Migrated existing data to populate new metadata fields for Tasks and Content.
This commit is contained in:
IGNY8 VPS (Salman)
2025-11-19 19:21:30 +00:00
parent 38f6026e73
commit bae9ea47d8
33 changed files with 2388 additions and 73 deletions

View File

@@ -0,0 +1,2 @@
# Writer management commands

View File

@@ -0,0 +1,2 @@
# Writer management commands

View File

@@ -0,0 +1,114 @@
"""
Management command to audit site metadata gaps
Stage 3: Summarizes metadata completeness per site
Usage: python manage.py audit_site_metadata --site {id}
"""
from django.core.management.base import BaseCommand
from django.db.models import Count, Q
from igny8_core.auth.models import Site
from igny8_core.business.content.models import (
Tasks,
Content,
ContentClusterMap,
ContentTaxonomyMap,
ContentAttributeMap,
)
class Command(BaseCommand):
help = 'Audit metadata completeness for a site (Stage 3)'
def add_arguments(self, parser):
parser.add_argument(
'--site',
type=int,
help='Site ID to audit (if not provided, audits all sites)',
)
parser.add_argument(
'--detailed',
action='store_true',
help='Show detailed breakdown by entity type',
)
def handle(self, *args, **options):
site_id = options.get('site')
detailed = options.get('detailed', False)
if site_id:
sites = Site.objects.filter(id=site_id)
else:
sites = Site.objects.all()
if not sites.exists():
self.stdout.write(self.style.ERROR(f'Site {site_id} not found'))
return
for site in sites:
self.stdout.write(self.style.SUCCESS(f'\n{"="*80}'))
self.stdout.write(self.style.SUCCESS(f'Auditing Site: {site.name} (ID: {site.id})'))
self.stdout.write(self.style.SUCCESS(f'{"="*80}\n'))
# Tasks audit
tasks = Tasks.objects.filter(site=site)
total_tasks = tasks.count()
tasks_with_cluster = tasks.filter(cluster__isnull=False).count()
tasks_with_entity_type = tasks.filter(entity_type__isnull=False).count()
tasks_with_taxonomy = tasks.filter(taxonomy__isnull=False).count()
tasks_with_cluster_role = tasks.filter(cluster_role__isnull=False).count()
self.stdout.write(f'\n📋 Tasks Summary:')
self.stdout.write(f' Total Tasks: {total_tasks}')
self.stdout.write(f' With Cluster: {tasks_with_cluster}/{total_tasks} ({tasks_with_cluster*100//total_tasks if total_tasks else 0}%)')
self.stdout.write(f' With Entity Type: {tasks_with_entity_type}/{total_tasks} ({tasks_with_entity_type*100//total_tasks if total_tasks else 0}%)')
self.stdout.write(f' With Taxonomy: {tasks_with_taxonomy}/{total_tasks} ({tasks_with_taxonomy*100//total_tasks if total_tasks else 0}%)')
self.stdout.write(f' With Cluster Role: {tasks_with_cluster_role}/{total_tasks} ({tasks_with_cluster_role*100//total_tasks if total_tasks else 0}%)')
# Content audit
content = Content.objects.filter(site=site)
total_content = content.count()
content_with_entity_type = content.filter(entity_type__isnull=False).count()
content_with_cluster_map = ContentClusterMap.objects.filter(
content__site=site
).values('content').distinct().count()
content_with_taxonomy_map = ContentTaxonomyMap.objects.filter(
content__site=site
).values('content').distinct().count()
content_with_attributes = ContentAttributeMap.objects.filter(
content__site=site
).values('content').distinct().count()
self.stdout.write(f'\n📄 Content Summary:')
self.stdout.write(f' Total Content: {total_content}')
self.stdout.write(f' With Entity Type: {content_with_entity_type}/{total_content} ({content_with_entity_type*100//total_content if total_content else 0}%)')
self.stdout.write(f' With Cluster Mapping: {content_with_cluster_map}/{total_content} ({content_with_cluster_map*100//total_content if total_content else 0}%)')
self.stdout.write(f' With Taxonomy Mapping: {content_with_taxonomy_map}/{total_content} ({content_with_taxonomy_map*100//total_content if total_content else 0}%)')
self.stdout.write(f' With Attributes: {content_with_attributes}/{total_content} ({content_with_attributes*100//total_content if total_content else 0}%)')
# Gap analysis
tasks_missing_cluster = tasks.filter(cluster__isnull=True).count()
tasks_missing_entity_type = tasks.filter(entity_type__isnull=True).count()
content_missing_cluster_map = total_content - content_with_cluster_map
self.stdout.write(f'\n⚠️ Gaps:')
self.stdout.write(f' Tasks missing cluster: {tasks_missing_cluster}')
self.stdout.write(f' Tasks missing entity_type: {tasks_missing_entity_type}')
self.stdout.write(f' Content missing cluster mapping: {content_missing_cluster_map}')
if detailed:
# Entity type breakdown
self.stdout.write(f'\n📊 Entity Type Breakdown:')
entity_types = tasks.values('entity_type').annotate(count=Count('id')).order_by('-count')
for et in entity_types:
self.stdout.write(f' {et["entity_type"] or "NULL"}: {et["count"]} tasks')
# Cluster role breakdown
self.stdout.write(f'\n🎯 Cluster Role Breakdown:')
roles = tasks.values('cluster_role').annotate(count=Count('id')).order_by('-count')
for role in roles:
self.stdout.write(f' {role["cluster_role"] or "NULL"}: {role["count"]} tasks')
self.stdout.write('')