Implement Stage 3: Enhance content metadata and validation features
- Added entity metadata fields to the Tasks model, including entity_type, taxonomy, and cluster_role. - Updated CandidateEngine to prioritize content relevance based on cluster mappings. - Introduced metadata completeness scoring in ContentAnalyzer. - Enhanced validation services to check for entity type and mapping completeness. - Updated frontend components to display and validate new metadata fields. - Implemented API endpoints for content validation and metadata persistence. - Migrated existing data to populate new metadata fields for Tasks and Content.
This commit is contained in:
2
backend/igny8_core/modules/writer/management/__init__.py
Normal file
2
backend/igny8_core/modules/writer/management/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
# Writer management commands
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
# Writer management commands
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
"""
|
||||
Management command to audit site metadata gaps
|
||||
Stage 3: Summarizes metadata completeness per site
|
||||
|
||||
Usage: python manage.py audit_site_metadata --site {id}
|
||||
"""
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Count, Q
|
||||
from igny8_core.auth.models import Site
|
||||
from igny8_core.business.content.models import (
|
||||
Tasks,
|
||||
Content,
|
||||
ContentClusterMap,
|
||||
ContentTaxonomyMap,
|
||||
ContentAttributeMap,
|
||||
)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Audit metadata completeness for a site (Stage 3)'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--site',
|
||||
type=int,
|
||||
help='Site ID to audit (if not provided, audits all sites)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--detailed',
|
||||
action='store_true',
|
||||
help='Show detailed breakdown by entity type',
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
site_id = options.get('site')
|
||||
detailed = options.get('detailed', False)
|
||||
|
||||
if site_id:
|
||||
sites = Site.objects.filter(id=site_id)
|
||||
else:
|
||||
sites = Site.objects.all()
|
||||
|
||||
if not sites.exists():
|
||||
self.stdout.write(self.style.ERROR(f'Site {site_id} not found'))
|
||||
return
|
||||
|
||||
for site in sites:
|
||||
self.stdout.write(self.style.SUCCESS(f'\n{"="*80}'))
|
||||
self.stdout.write(self.style.SUCCESS(f'Auditing Site: {site.name} (ID: {site.id})'))
|
||||
self.stdout.write(self.style.SUCCESS(f'{"="*80}\n'))
|
||||
|
||||
# Tasks audit
|
||||
tasks = Tasks.objects.filter(site=site)
|
||||
total_tasks = tasks.count()
|
||||
|
||||
tasks_with_cluster = tasks.filter(cluster__isnull=False).count()
|
||||
tasks_with_entity_type = tasks.filter(entity_type__isnull=False).count()
|
||||
tasks_with_taxonomy = tasks.filter(taxonomy__isnull=False).count()
|
||||
tasks_with_cluster_role = tasks.filter(cluster_role__isnull=False).count()
|
||||
|
||||
self.stdout.write(f'\n📋 Tasks Summary:')
|
||||
self.stdout.write(f' Total Tasks: {total_tasks}')
|
||||
self.stdout.write(f' With Cluster: {tasks_with_cluster}/{total_tasks} ({tasks_with_cluster*100//total_tasks if total_tasks else 0}%)')
|
||||
self.stdout.write(f' With Entity Type: {tasks_with_entity_type}/{total_tasks} ({tasks_with_entity_type*100//total_tasks if total_tasks else 0}%)')
|
||||
self.stdout.write(f' With Taxonomy: {tasks_with_taxonomy}/{total_tasks} ({tasks_with_taxonomy*100//total_tasks if total_tasks else 0}%)')
|
||||
self.stdout.write(f' With Cluster Role: {tasks_with_cluster_role}/{total_tasks} ({tasks_with_cluster_role*100//total_tasks if total_tasks else 0}%)')
|
||||
|
||||
# Content audit
|
||||
content = Content.objects.filter(site=site)
|
||||
total_content = content.count()
|
||||
|
||||
content_with_entity_type = content.filter(entity_type__isnull=False).count()
|
||||
content_with_cluster_map = ContentClusterMap.objects.filter(
|
||||
content__site=site
|
||||
).values('content').distinct().count()
|
||||
content_with_taxonomy_map = ContentTaxonomyMap.objects.filter(
|
||||
content__site=site
|
||||
).values('content').distinct().count()
|
||||
content_with_attributes = ContentAttributeMap.objects.filter(
|
||||
content__site=site
|
||||
).values('content').distinct().count()
|
||||
|
||||
self.stdout.write(f'\n📄 Content Summary:')
|
||||
self.stdout.write(f' Total Content: {total_content}')
|
||||
self.stdout.write(f' With Entity Type: {content_with_entity_type}/{total_content} ({content_with_entity_type*100//total_content if total_content else 0}%)')
|
||||
self.stdout.write(f' With Cluster Mapping: {content_with_cluster_map}/{total_content} ({content_with_cluster_map*100//total_content if total_content else 0}%)')
|
||||
self.stdout.write(f' With Taxonomy Mapping: {content_with_taxonomy_map}/{total_content} ({content_with_taxonomy_map*100//total_content if total_content else 0}%)')
|
||||
self.stdout.write(f' With Attributes: {content_with_attributes}/{total_content} ({content_with_attributes*100//total_content if total_content else 0}%)')
|
||||
|
||||
# Gap analysis
|
||||
tasks_missing_cluster = tasks.filter(cluster__isnull=True).count()
|
||||
tasks_missing_entity_type = tasks.filter(entity_type__isnull=True).count()
|
||||
content_missing_cluster_map = total_content - content_with_cluster_map
|
||||
|
||||
self.stdout.write(f'\n⚠️ Gaps:')
|
||||
self.stdout.write(f' Tasks missing cluster: {tasks_missing_cluster}')
|
||||
self.stdout.write(f' Tasks missing entity_type: {tasks_missing_entity_type}')
|
||||
self.stdout.write(f' Content missing cluster mapping: {content_missing_cluster_map}')
|
||||
|
||||
if detailed:
|
||||
# Entity type breakdown
|
||||
self.stdout.write(f'\n📊 Entity Type Breakdown:')
|
||||
entity_types = tasks.values('entity_type').annotate(count=Count('id')).order_by('-count')
|
||||
for et in entity_types:
|
||||
self.stdout.write(f' {et["entity_type"] or "NULL"}: {et["count"]} tasks')
|
||||
|
||||
# Cluster role breakdown
|
||||
self.stdout.write(f'\n🎯 Cluster Role Breakdown:')
|
||||
roles = tasks.values('cluster_role').annotate(count=Count('id')).order_by('-count')
|
||||
for role in roles:
|
||||
self.stdout.write(f' {role["cluster_role"] or "NULL"}: {role["count"]} tasks')
|
||||
|
||||
self.stdout.write('')
|
||||
|
||||
Reference in New Issue
Block a user