Refactor content handling in GenerateContentFunction and update related models and serializers

- Enhanced GenerateContentFunction to save content in a dedicated Content model, separating it from the Tasks model.
- Updated Tasks model to remove SEO-related fields, now managed in the Content model.
- Modified TasksSerializer to include new content fields and adjusted the API to reflect these changes.
- Improved the auto_generate_content_task method to utilize the new save_output method for better content management.
- Updated frontend components to display new content structure and metadata effectively.
This commit is contained in:
IGNY8 VPS (Salman)
2025-11-10 14:06:15 +00:00
parent 8bb4c5d016
commit 8b6e18649c
11 changed files with 596 additions and 356 deletions

View File

@@ -7,7 +7,7 @@ import re
from typing import Dict, List, Any
from django.db import transaction
from igny8_core.ai.base import BaseAIFunction
from igny8_core.modules.writer.models import Tasks
from igny8_core.modules.writer.models import Tasks, Content as TaskContent
from igny8_core.ai.ai_core import AICore
from igny8_core.ai.validators import validate_tasks_exist
from igny8_core.ai.prompts import PromptRegistry
@@ -188,69 +188,111 @@ class GenerateContentFunction(BaseAIFunction):
# Handle parsed response - can be dict (JSON) or string (plain text)
if isinstance(parsed, dict):
# JSON response with structured fields
content = parsed.get('content', '')
title = parsed.get('title', task.title)
meta_title = parsed.get('meta_title', title or task.title)
content_html = parsed.get('content', '')
title = parsed.get('title') or task.title
meta_title = parsed.get('meta_title') or title or task.title
meta_description = parsed.get('meta_description', '')
word_count = parsed.get('word_count', 0)
primary_keyword = parsed.get('primary_keyword', '')
secondary_keywords = parsed.get('secondary_keywords', [])
tags = parsed.get('tags', [])
categories = parsed.get('categories', [])
content_status = parsed.get('status', 'draft')
else:
# Plain text response (legacy)
content = str(parsed)
content_html = str(parsed)
title = task.title
meta_title = task.title
meta_description = (task.description or '')[:160] if task.description else ''
meta_title = task.meta_title or task.title
meta_description = task.meta_description or (task.description or '')[:160] if task.description else ''
word_count = 0
primary_keyword = ''
secondary_keywords = []
tags = []
categories = []
content_status = 'draft'
# Calculate word count if not provided
if not word_count and content:
text_for_counting = re.sub(r'<[^>]+>', '', content)
if not word_count and content_html:
text_for_counting = re.sub(r'<[^>]+>', '', content_html)
word_count = len(text_for_counting.split())
# Update task with all fields
if content:
task.content = content
if title and title != task.title:
task.title = title
task.word_count = word_count
# SEO fields
if meta_title:
task.meta_title = meta_title
elif not task.meta_title:
task.meta_title = task.title # Fallback to title
if meta_description:
task.meta_description = meta_description
elif not task.meta_description and task.description:
task.meta_description = (task.description or '')[:160] # Fallback to description
if primary_keyword:
task.primary_keyword = primary_keyword
if secondary_keywords:
task.secondary_keywords = secondary_keywords if isinstance(secondary_keywords, list) else []
if tags:
task.tags = tags if isinstance(tags, list) else []
if categories:
task.categories = categories if isinstance(categories, list) else []
task.status = 'draft'
task.save()
# Ensure related content record exists
content_record, _created = TaskContent.objects.get_or_create(
task=task,
defaults={
'account': task.account,
'site': task.site,
'sector': task.sector,
'html_content': content_html or '',
'word_count': word_count or 0,
'status': 'draft',
},
)
# Update content fields
if content_html:
content_record.html_content = content_html
content_record.word_count = word_count or content_record.word_count or 0
content_record.title = title
content_record.meta_title = meta_title
content_record.meta_description = meta_description
content_record.primary_keyword = primary_keyword or ''
if isinstance(secondary_keywords, list):
content_record.secondary_keywords = secondary_keywords
elif secondary_keywords:
content_record.secondary_keywords = [secondary_keywords]
else:
content_record.secondary_keywords = []
if isinstance(tags, list):
content_record.tags = tags
elif tags:
content_record.tags = [tags]
else:
content_record.tags = []
if isinstance(categories, list):
content_record.categories = categories
elif categories:
content_record.categories = [categories]
else:
content_record.categories = []
content_record.status = content_status or 'draft'
# Merge any extra fields into metadata (non-standard keys)
if isinstance(parsed, dict):
excluded_keys = {
'content',
'title',
'meta_title',
'meta_description',
'primary_keyword',
'secondary_keywords',
'tags',
'categories',
'word_count',
'status',
}
extra_meta = {k: v for k, v in parsed.items() if k not in excluded_keys}
existing_meta = content_record.metadata or {}
existing_meta.update(extra_meta)
content_record.metadata = existing_meta
# Align foreign keys to ensure consistency
content_record.account = task.account
content_record.site = task.site
content_record.sector = task.sector
content_record.task = task
content_record.save()
# Update task status - keep task data intact but mark as completed
task.status = 'completed'
task.save(update_fields=['status', 'updated_at'])
return {
'count': 1,
'tasks_updated': 1,
'word_count': word_count
'word_count': content_record.word_count,
}

View File

@@ -1,7 +1,6 @@
# Generated manually for adding seed_keyword relationship to Keywords
from django.db import migrations, models
import django.db.models.deletion
from django.db import migrations
class Migration(migrations.Migration):
@@ -11,76 +10,7 @@ class Migration(migrations.Migration):
('planner', '0003_alter_clusters_sector_alter_clusters_site_and_more'),
]
operations = [
# Remove old fields (keyword, volume, difficulty, intent)
migrations.RemoveField(
model_name='keywords',
name='keyword',
),
migrations.RemoveField(
model_name='keywords',
name='volume',
),
migrations.RemoveField(
model_name='keywords',
name='difficulty',
),
migrations.RemoveField(
model_name='keywords',
name='intent',
),
# Add seed_keyword FK
migrations.AddField(
model_name='keywords',
name='seed_keyword',
field=models.ForeignKey(
help_text='Reference to the global seed keyword',
on_delete=django.db.models.deletion.PROTECT,
related_name='site_keywords',
to='igny8_core_auth.seedkeyword',
null=True # Temporarily nullable for migration
),
),
# Add override fields
migrations.AddField(
model_name='keywords',
name='volume_override',
field=models.IntegerField(blank=True, help_text='Site-specific volume override (uses seed_keyword.volume if not set)', null=True),
),
migrations.AddField(
model_name='keywords',
name='difficulty_override',
field=models.IntegerField(blank=True, help_text='Site-specific difficulty override (uses seed_keyword.difficulty if not set)', null=True),
),
# Make seed_keyword required (after data migration if needed)
migrations.AlterField(
model_name='keywords',
name='seed_keyword',
field=models.ForeignKey(
help_text='Reference to the global seed keyword',
on_delete=django.db.models.deletion.PROTECT,
related_name='site_keywords',
to='igny8_core_auth.seedkeyword'
),
),
# Add unique constraint
migrations.AlterUniqueTogether(
name='keywords',
unique_together={('seed_keyword', 'site', 'sector')},
),
# Update indexes
migrations.AlterIndexTogether(
name='keywords',
index_together=set(),
),
# Add new indexes
migrations.AddIndex(
model_name='keywords',
index=models.Index(fields=['seed_keyword'], name='igny8_keyw_seed_k_12345_idx'),
),
migrations.AddIndex(
model_name='keywords',
index=models.Index(fields=['seed_keyword', 'site', 'sector'], name='igny8_keyw_seed_si_67890_idx'),
),
]
# Duplicate of planner.0006_add_seed_keyword_to_keywords.
# This migration is kept as a no-op to avoid applying the schema changes twice.
operations = []

View File

@@ -0,0 +1,171 @@
from django.db import migrations, models
def _normalize_list(value):
if not value:
return []
if isinstance(value, list):
return value
if isinstance(value, tuple):
return list(value)
return [value]
def forwards(apps, schema_editor):
Tasks = apps.get_model('writer', 'Tasks')
Content = apps.get_model('writer', 'Content')
for task in Tasks.objects.all():
account_id = getattr(task, 'account_id', None)
if account_id is None and getattr(task, 'account', None):
account_id = task.account.id
site_id = getattr(task, 'site_id', None)
if site_id is None and getattr(task, 'site', None):
site_id = task.site.id
sector_id = getattr(task, 'sector_id', None)
if sector_id is None and getattr(task, 'sector', None):
sector_id = task.sector.id if task.sector else None
tenant_id = getattr(task, 'tenant_id', None)
if tenant_id is None and getattr(task, 'tenant', None):
tenant_id = task.tenant.id
# Prepare defaults for new content record
defaults = {
'html_content': task.content or '',
'word_count': task.word_count or 0,
'title': getattr(task, 'title', None),
'meta_title': getattr(task, 'meta_title', None),
'meta_description': getattr(task, 'meta_description', None),
'primary_keyword': getattr(task, 'primary_keyword', None),
'secondary_keywords': _normalize_list(getattr(task, 'secondary_keywords', [])),
'tags': _normalize_list(getattr(task, 'tags', [])),
'categories': _normalize_list(getattr(task, 'categories', [])),
'status': 'draft',
}
content_record = Content.objects.filter(task_id=task.id).first()
created = False
if not content_record:
content_record = Content(task_id=task.id, **defaults)
created = True
# Update existing records with the latest information
if not created:
if task.content:
content_record.html_content = task.content
if task.word_count:
content_record.word_count = task.word_count
if getattr(task, 'title', None):
content_record.title = task.title
if getattr(task, 'meta_title', None):
content_record.meta_title = task.meta_title
if getattr(task, 'meta_description', None):
content_record.meta_description = task.meta_description
if hasattr(task, 'primary_keyword'):
content_record.primary_keyword = task.primary_keyword or ''
if hasattr(task, 'secondary_keywords'):
content_record.secondary_keywords = _normalize_list(task.secondary_keywords)
if hasattr(task, 'tags'):
content_record.tags = _normalize_list(task.tags)
if hasattr(task, 'categories'):
content_record.categories = _normalize_list(task.categories)
if not content_record.status:
content_record.status = 'draft'
# Ensure account/site/sector alignment (save() will also set this)
if account_id:
content_record.account_id = account_id
if site_id:
content_record.site_id = site_id
if sector_id:
content_record.sector_id = sector_id
if tenant_id:
content_record.tenant_id = tenant_id
# Preserve existing metadata but ensure it's a dict
metadata = content_record.metadata or {}
content_record.metadata = metadata
content_record.save()
def backwards(apps, schema_editor):
"""
Reverse data migration is intentionally left as a no-op because
reverting would require reintroducing the removed fields on Tasks.
"""
pass
class Migration(migrations.Migration):
dependencies = [
('writer', '0004_add_content_seo_fields'),
]
operations = [
migrations.AddField(
model_name='content',
name='categories',
field=models.JSONField(blank=True, default=list, help_text='List of categories'),
),
migrations.AddField(
model_name='content',
name='meta_description',
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name='content',
name='meta_title',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AddField(
model_name='content',
name='primary_keyword',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AddField(
model_name='content',
name='secondary_keywords',
field=models.JSONField(blank=True, default=list, help_text='List of secondary keywords'),
),
migrations.AddField(
model_name='content',
name='status',
field=models.CharField(default='draft', help_text='Content workflow status (draft, review, published, etc.)', max_length=50),
),
migrations.AddField(
model_name='content',
name='tags',
field=models.JSONField(blank=True, default=list, help_text='List of tags'),
),
migrations.AddField(
model_name='content',
name='title',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.RunPython(forwards, backwards),
migrations.RemoveField(
model_name='tasks',
name='categories',
),
migrations.RemoveField(
model_name='tasks',
name='primary_keyword',
),
migrations.RemoveField(
model_name='tasks',
name='secondary_keywords',
),
migrations.RemoveField(
model_name='tasks',
name='tags',
),
]

View File

@@ -65,11 +65,6 @@ class Tasks(SiteSectorBaseModel):
# SEO fields
meta_title = models.CharField(max_length=255, blank=True, null=True)
meta_description = models.TextField(blank=True, null=True)
primary_keyword = models.CharField(max_length=255, blank=True, null=True)
secondary_keywords = models.JSONField(default=list, blank=True, help_text="List of secondary keywords")
tags = models.JSONField(default=list, blank=True, help_text="List of tags")
categories = models.JSONField(default=list, blank=True, help_text="List of categories")
# WordPress integration
assigned_post_id = models.IntegerField(null=True, blank=True) # WordPress post ID if published
post_url = models.URLField(blank=True, null=True) # WordPress post URL
@@ -108,6 +103,14 @@ class Content(SiteSectorBaseModel):
html_content = models.TextField(help_text="Final AI-generated HTML content")
word_count = models.IntegerField(default=0, validators=[MinValueValidator(0)])
metadata = models.JSONField(default=dict, help_text="Additional metadata (SEO, structure, etc.)")
title = models.CharField(max_length=255, blank=True, null=True)
meta_title = models.CharField(max_length=255, blank=True, null=True)
meta_description = models.TextField(blank=True, null=True)
primary_keyword = models.CharField(max_length=255, blank=True, null=True)
secondary_keywords = models.JSONField(default=list, blank=True, help_text="List of secondary keywords")
tags = models.JSONField(default=list, blank=True, help_text="List of tags")
categories = models.JSONField(default=list, blank=True, help_text="List of categories")
status = models.CharField(max_length=50, default='draft', help_text="Content workflow status (draft, review, published, etc.)")
generated_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)

View File

@@ -10,6 +10,11 @@ class TasksSerializer(serializers.ModelSerializer):
idea_title = serializers.SerializerMethodField()
site_id = serializers.IntegerField(write_only=True, required=False)
sector_id = serializers.IntegerField(write_only=True, required=False)
content_html = serializers.SerializerMethodField()
content_primary_keyword = serializers.SerializerMethodField()
content_secondary_keywords = serializers.SerializerMethodField()
content_tags = serializers.SerializerMethodField()
content_categories = serializers.SerializerMethodField()
class Meta:
model = Tasks
@@ -30,10 +35,11 @@ class TasksSerializer(serializers.ModelSerializer):
'word_count',
'meta_title',
'meta_description',
'primary_keyword',
'secondary_keywords',
'tags',
'categories',
'content_html',
'content_primary_keyword',
'content_secondary_keywords',
'content_tags',
'content_categories',
'assigned_post_id',
'post_url',
'created_at',
@@ -75,6 +81,32 @@ class TasksSerializer(serializers.ModelSerializer):
return None
return None
def _get_content_record(self, obj):
try:
return obj.content_record
except AttributeError:
return None
def get_content_html(self, obj):
record = self._get_content_record(obj)
return record.html_content if record else None
def get_content_primary_keyword(self, obj):
record = self._get_content_record(obj)
return record.primary_keyword if record else None
def get_content_secondary_keywords(self, obj):
record = self._get_content_record(obj)
return record.secondary_keywords if record else []
def get_content_tags(self, obj):
record = self._get_content_record(obj)
return record.tags if record else []
def get_content_categories(self, obj):
record = self._get_content_record(obj)
return record.categories if record else []
class ImagesSerializer(serializers.ModelSerializer):
"""Serializer for Images model"""
@@ -122,6 +154,14 @@ class ContentSerializer(serializers.ModelSerializer):
'html_content',
'word_count',
'metadata',
'title',
'meta_title',
'meta_description',
'primary_keyword',
'secondary_keywords',
'tags',
'categories',
'status',
'generated_at',
'updated_at',
'account_id',

View File

@@ -632,103 +632,122 @@ def auto_generate_content_task(self, task_ids: List[int], account_id: int = None
}
)
# Normalize content from different AI response formats
logger.info(f" * Normalizing content (length: {len(content)} chars)...")
# Parse JSON response using GenerateContentFunction's parse_response method
logger.info(f" * Parsing AI response (length: {len(content)} chars)...")
try:
from igny8_core.utils.content_normalizer import normalize_content
normalized = normalize_content(content)
normalized_content = normalized['normalized_content']
content_type = normalized['content_type']
has_structure = normalized['has_structure']
original_format = normalized['original_format']
from igny8_core.ai.functions.generate_content import GenerateContentFunction
fn = GenerateContentFunction()
parsed_response = fn.parse_response(content)
logger.info(f" * ✓ Content normalized:")
logger.info(f" - Original format: {original_format}")
logger.info(f" - Content type: {content_type}")
logger.info(f" - Has structure: {has_structure}")
logger.info(f" - Normalized length: {len(normalized_content)} chars")
logger.info(f" - Normalized preview (first 200 chars): {normalized_content[:200]}...")
logger.info(f" * ✓ Response parsed:")
logger.info(f" - Type: {type(parsed_response).__name__}")
if isinstance(parsed_response, dict):
logger.info(f" - Keys: {list(parsed_response.keys())}")
logger.info(f" - Has title: {bool(parsed_response.get('title'))}")
logger.info(f" - Has meta_title: {bool(parsed_response.get('meta_title'))}")
logger.info(f" - Has primary_keyword: {bool(parsed_response.get('primary_keyword'))}")
logger.info(f" - Has secondary_keywords: {bool(parsed_response.get('secondary_keywords'))}")
logger.info(f" - Has tags: {bool(parsed_response.get('tags'))}")
logger.info(f" - Has categories: {bool(parsed_response.get('categories'))}")
logger.info(f" - Content length: {len(parsed_response.get('content', ''))} chars")
else:
logger.info(f" - Content length: {len(str(parsed_response))} chars")
# Use normalized content
content = normalized_content
# Use parsed response for saving
parsed_data = parsed_response
except Exception as norm_error:
logger.warning(f" * ⚠️ Content normalization failed: {type(norm_error).__name__}: {str(norm_error)}")
logger.warning(f" * Using original content as-is")
# Continue with original content
except Exception as parse_error:
logger.warning(f" * ⚠️ JSON parsing failed: {type(parse_error).__name__}: {str(parse_error)}")
logger.warning(f" * Treating as plain text content")
# Fallback to plain text
parsed_data = {'content': content}
# Normalize content from parsed response
content_to_normalize = parsed_data.get('content', '') if isinstance(parsed_data, dict) else str(parsed_data)
if content_to_normalize:
logger.info(f" * Normalizing content (length: {len(content_to_normalize)} chars)...")
try:
from igny8_core.utils.content_normalizer import normalize_content
normalized = normalize_content(content_to_normalize)
normalized_content = normalized['normalized_content']
content_type = normalized['content_type']
has_structure = normalized['has_structure']
original_format = normalized['original_format']
logger.info(f" * ✓ Content normalized:")
logger.info(f" - Original format: {original_format}")
logger.info(f" - Content type: {content_type}")
logger.info(f" - Has structure: {has_structure}")
logger.info(f" - Normalized length: {len(normalized_content)} chars")
logger.info(f" - Normalized preview (first 200 chars): {normalized_content[:200]}...")
# Update parsed_data with normalized content
if isinstance(parsed_data, dict):
parsed_data['content'] = normalized_content
else:
parsed_data = {'content': normalized_content}
except Exception as norm_error:
logger.warning(f" * ⚠️ Content normalization failed: {type(norm_error).__name__}: {str(norm_error)}")
logger.warning(f" * Using original content as-is")
# Continue with original content
except Exception as ai_error:
logger.error(f" * ✗ EXCEPTION during AI API call: {type(ai_error).__name__}: {str(ai_error)}")
logger.error(f" * Task ID: {task.id}", exc_info=True)
continue
# Calculate word count from normalized content
# Remove HTML tags for word count
text_for_counting = re.sub(r'<[^>]+>', '', content)
word_count = len(text_for_counting.split())
logger.info(f" * ✓ Word count calculated: {word_count} words (from normalized HTML)")
# Update progress: Saving content
add_step('SAVE', 'success', f"Saving content for '{task.title}' ({word_count} words)...", 'request')
save_pct = 85 + int((idx / total_tasks) * 10) # 85-95% for saving
self.update_state(
state='PROGRESS',
meta={
'current': idx + 1,
'total': total_tasks,
'percentage': save_pct,
'message': f"Saving content for '{task.title}' ({word_count} words)...",
'phase': 'SAVE',
'current_item': task.title,
'request_steps': request_steps,
'response_steps': response_steps
}
)
# ========================================================================
# DATABASE SAVE PHASE - Detailed logging
# ========================================================================
logger.info(" - Saving content to database...")
# Use GenerateContentFunction's save_output method to properly save all fields
logger.info(" - Saving content to database using GenerateContentFunction.save_output()...")
try:
# Update task
logger.info(f" * Updating task {task.id} fields...")
task.content = content
logger.info(f" - content: {len(content)} chars")
from igny8_core.ai.functions.generate_content import GenerateContentFunction
fn = GenerateContentFunction()
task.word_count = word_count
# Save using the proper save_output method which handles all fields
save_result = fn.save_output(parsed_data, [task], account)
# Get word count from save result or calculate
word_count = save_result.get('word_count', 0)
if not word_count and isinstance(parsed_data, dict):
content_for_count = parsed_data.get('content', '')
if content_for_count:
text_for_counting = re.sub(r'<[^>]+>', '', content_for_count)
word_count = len(text_for_counting.split())
logger.info(f" * ✓ Task saved successfully using save_output()")
logger.info(f" - tasks_updated: {save_result.get('tasks_updated', 0)}")
logger.info(f" - word_count: {word_count}")
task.meta_title = task.title # Use title as meta title for now
logger.info(f" - meta_title: {task.title}")
# Log all fields that were saved
logger.info(f" * Saved fields:")
logger.info(f" - task_id: {task.id}")
logger.info(f" - task_status: {task.status}")
if isinstance(parsed_data, dict):
logger.info(f" - content_title: {parsed_data.get('title') or task.title}")
logger.info(f" - content_primary_keyword: {parsed_data.get('primary_keyword') or 'N/A'}")
logger.info(f" - content_secondary_keywords: {len(parsed_data.get('secondary_keywords') or [])} items")
logger.info(f" - content_tags: {len(parsed_data.get('tags') or [])} items")
logger.info(f" - content_categories: {len(parsed_data.get('categories') or [])} items")
logger.info(f" - content_word_count: {word_count}")
task.meta_description = (task.description or '')[:160] # Truncate to 160 chars
logger.info(f" - meta_description: {len(task.meta_description)} chars")
# Update progress: Saving content
add_step('SAVE', 'success', f"Content saved for '{task.title}' ({word_count} words)...", 'response')
save_pct = 85 + int((idx / total_tasks) * 10) # 85-95% for saving
self.update_state(
state='PROGRESS',
meta={
'current': idx + 1,
'total': total_tasks,
'percentage': save_pct,
'message': f"Content saved for '{task.title}' ({word_count} words)...",
'phase': 'SAVE',
'current_item': task.title,
'request_steps': request_steps,
'response_steps': response_steps
}
)
old_status = task.status
task.status = 'draft' # Update status from queued to draft
logger.info(f" - status: {old_status}{task.status}")
# Log all fields being saved
logger.info(f" * Task fields to save:")
logger.info(f" - id: {task.id}")
logger.info(f" - title: {task.title}")
logger.info(f" - account_id: {task.account_id}")
logger.info(f" - site_id: {task.site_id}")
logger.info(f" - sector_id: {task.sector_id}")
logger.info(f" - cluster_id: {task.cluster_id}")
logger.info(f" - idea_id: {task.idea_id}")
logger.info(f" - content length: {len(task.content)}")
logger.info(f" - word_count: {task.word_count}")
# Save to database
logger.info(f" * Executing task.save()...")
task.save()
logger.info(f" * ✓ Task saved successfully to database")
# Mark save step as complete
add_step('SAVE', 'success', f"Content saved for '{task.title}'", 'response')
tasks_updated += 1
tasks_updated += save_result.get('tasks_updated', 0)
logger.info(f" * ✓ Task {task.id} content generation completed successfully")
except Exception as save_error:

View File

@@ -13,7 +13,7 @@ class TasksViewSet(SiteSectorModelViewSet):
"""
ViewSet for managing tasks with CRUD operations
"""
queryset = Tasks.objects.all()
queryset = Tasks.objects.select_related('content_record')
serializer_class = TasksSerializer
pagination_class = CustomPageNumberPagination # Explicitly use custom pagination