Add source tracking and sync status fields to Content model; update services module

- Introduced new fields in the Content model for source tracking and sync status, including external references and optimization fields. - Updated the services module to include new content generation and pipeline services for better organization and clarity.
2025-11-17 11:15:15 +00:00
parent fe95d09bbe
commit 9930728e8a
19 changed files with 2281 additions and 1 deletions
--- a/backend/igny8_core/business/optimization/services/init.py
+++ b/backend/igny8_core/business/optimization/services/init.py
@@ -0,0 +1,5 @@
+"""
+Optimization Services
+"""
+
+
--- a/backend/igny8_core/business/optimization/services/analyzer.py
+++ b/backend/igny8_core/business/optimization/services/analyzer.py
@@ -0,0 +1,184 @@
+"""
+Content Analyzer
+Analyzes content quality and calculates optimization scores
+"""
+import logging
+import re
+from typing import Dict
+from igny8_core.business.content.models import Content
+
+logger = logging.getLogger(__name__)
+
+
+class ContentAnalyzer:
+    """Analyzes content quality"""
+    
+    def analyze(self, content: Content) -> Dict:
+        """
+        Analyze content and return scores.
+        
+        Args:
+            content: Content instance to analyze
+        
+        Returns:
+            Dict with scores: {'seo_score', 'readability_score', 'engagement_score', 'overall_score'}
+        """
+        if not content or not content.html_content:
+            return {
+                'seo_score': 0,
+                'readability_score': 0,
+                'engagement_score': 0,
+                'overall_score': 0
+            }
+        
+        seo_score = self._calculate_seo_score(content)
+        readability_score = self._calculate_readability_score(content)
+        engagement_score = self._calculate_engagement_score(content)
+        
+        # Overall score is weighted average
+        overall_score = (
+            seo_score * 0.4 +
+            readability_score * 0.3 +
+            engagement_score * 0.3
+        )
+        
+        return {
+            'seo_score': round(seo_score, 2),
+            'readability_score': round(readability_score, 2),
+            'engagement_score': round(engagement_score, 2),
+            'overall_score': round(overall_score, 2),
+            'word_count': content.word_count or 0,
+            'has_meta_title': bool(content.meta_title),
+            'has_meta_description': bool(content.meta_description),
+            'has_primary_keyword': bool(content.primary_keyword),
+            'internal_links_count': len(content.internal_links) if content.internal_links else 0
+        }
+    
+    def _calculate_seo_score(self, content: Content) -> float:
+        """Calculate SEO score (0-100)"""
+        score = 0
+        
+        # Meta title (20 points)
+        if content.meta_title:
+            if len(content.meta_title) >= 30 and len(content.meta_title) <= 60:
+                score += 20
+            elif len(content.meta_title) > 0:
+                score += 10
+        
+        # Meta description (20 points)
+        if content.meta_description:
+            if len(content.meta_description) >= 120 and len(content.meta_description) <= 160:
+                score += 20
+            elif len(content.meta_description) > 0:
+                score += 10
+        
+        # Primary keyword (20 points)
+        if content.primary_keyword:
+            score += 20
+        
+        # Word count (20 points) - optimal range 1000-2500 words
+        word_count = content.word_count or 0
+        if 1000 <= word_count <= 2500:
+            score += 20
+        elif 500 <= word_count < 1000 or 2500 < word_count <= 3000:
+            score += 15
+        elif word_count > 0:
+            score += 10
+        
+        # Internal links (20 points)
+        internal_links = content.internal_links or []
+        if len(internal_links) >= 3:
+            score += 20
+        elif len(internal_links) >= 1:
+            score += 10
+        
+        return min(score, 100)
+    
+    def _calculate_readability_score(self, content: Content) -> float:
+        """Calculate readability score (0-100)"""
+        if not content.html_content:
+            return 0
+        
+        # Simple readability metrics
+        html = content.html_content
+        
+        # Remove HTML tags for text analysis
+        text = re.sub(r'<[^>]+>', '', html)
+        sentences = re.split(r'[.!?]+', text)
+        words = text.split()
+        
+        if not words:
+            return 0
+        
+        # Average sentence length (optimal: 15-20 words)
+        avg_sentence_length = len(words) / max(len(sentences), 1)
+        if 15 <= avg_sentence_length <= 20:
+            sentence_score = 40
+        elif 10 <= avg_sentence_length < 15 or 20 < avg_sentence_length <= 25:
+            sentence_score = 30
+        else:
+            sentence_score = 20
+        
+        # Average word length (optimal: 4-5 characters)
+        avg_word_length = sum(len(word) for word in words) / len(words)
+        if 4 <= avg_word_length <= 5:
+            word_score = 30
+        elif 3 <= avg_word_length < 4 or 5 < avg_word_length <= 6:
+            word_score = 20
+        else:
+            word_score = 10
+        
+        # Paragraph structure (30 points)
+        paragraphs = html.count('<p>') + html.count('<div>')
+        if paragraphs >= 3:
+            paragraph_score = 30
+        elif paragraphs >= 1:
+            paragraph_score = 20
+        else:
+            paragraph_score = 10
+        
+        return min(sentence_score + word_score + paragraph_score, 100)
+    
+    def _calculate_engagement_score(self, content: Content) -> float:
+        """Calculate engagement score (0-100)"""
+        score = 0
+        
+        # Headings (30 points)
+        if content.html_content:
+            h1_count = content.html_content.count('<h1>')
+            h2_count = content.html_content.count('<h2>')
+            h3_count = content.html_content.count('<h3>')
+            
+            if h1_count >= 1 and h2_count >= 2:
+                score += 30
+            elif h1_count >= 1 or h2_count >= 1:
+                score += 20
+            elif h3_count >= 1:
+                score += 10
+        
+        # Images (30 points)
+        if hasattr(content, 'images'):
+            image_count = content.images.count()
+            if image_count >= 3:
+                score += 30
+            elif image_count >= 1:
+                score += 20
+        
+        # Lists (20 points)
+        if content.html_content:
+            list_count = content.html_content.count('<ul>') + content.html_content.count('<ol>')
+            if list_count >= 2:
+                score += 20
+            elif list_count >= 1:
+                score += 10
+        
+        # Internal links (20 points)
+        internal_links = content.internal_links or []
+        if len(internal_links) >= 3:
+            score += 20
+        elif len(internal_links) >= 1:
+            score += 10
+        
+        return min(score, 100)
+
+
--- a/backend/igny8_core/business/optimization/services/optimizer_service.py
+++ b/backend/igny8_core/business/optimization/services/optimizer_service.py
@@ -0,0 +1,216 @@
+"""
+Optimizer Service
+Main service for content optimization with multiple entry points
+"""
+import logging
+from typing import Optional
+from igny8_core.business.content.models import Content
+from igny8_core.business.optimization.models import OptimizationTask
+from igny8_core.business.optimization.services.analyzer import ContentAnalyzer
+from igny8_core.business.billing.services.credit_service import CreditService
+from igny8_core.business.billing.exceptions import InsufficientCreditsError
+
+logger = logging.getLogger(__name__)
+
+
+class OptimizerService:
+    """Service for content optimization with multiple entry points"""
+    
+    def __init__(self):
+        self.analyzer = ContentAnalyzer()
+        self.credit_service = CreditService()
+    
+    def optimize_from_writer(self, content_id: int) -> Content:
+        """
+        Entry Point 1: Writer → Optimizer
+        
+        Args:
+            content_id: Content ID from Writer module
+        
+        Returns:
+            Optimized Content instance
+        """
+        try:
+            content = Content.objects.get(id=content_id, source='igny8')
+        except Content.DoesNotExist:
+            raise ValueError(f"IGNY8 content with id {content_id} does not exist")
+        
+        return self.optimize(content)
+    
+    def optimize_from_wordpress_sync(self, content_id: int) -> Content:
+        """
+        Entry Point 2: WordPress Sync → Optimizer
+        
+        Args:
+            content_id: Content ID synced from WordPress
+        
+        Returns:
+            Optimized Content instance
+        """
+        try:
+            content = Content.objects.get(id=content_id, source='wordpress')
+        except Content.DoesNotExist:
+            raise ValueError(f"WordPress content with id {content_id} does not exist")
+        
+        return self.optimize(content)
+    
+    def optimize_from_external_sync(self, content_id: int) -> Content:
+        """
+        Entry Point 3: External Sync → Optimizer (Shopify, custom APIs)
+        
+        Args:
+            content_id: Content ID synced from external source
+        
+        Returns:
+            Optimized Content instance
+        """
+        try:
+            content = Content.objects.get(id=content_id, source__in=['shopify', 'custom'])
+        except Content.DoesNotExist:
+            raise ValueError(f"External content with id {content_id} does not exist")
+        
+        return self.optimize(content)
+    
+    def optimize_manual(self, content_id: int) -> Content:
+        """
+        Entry Point 4: Manual Selection → Optimizer
+        
+        Args:
+            content_id: Content ID selected manually
+        
+        Returns:
+            Optimized Content instance
+        """
+        try:
+            content = Content.objects.get(id=content_id)
+        except Content.DoesNotExist:
+            raise ValueError(f"Content with id {content_id} does not exist")
+        
+        return self.optimize(content)
+    
+    def optimize(self, content: Content) -> Content:
+        """
+        Unified optimization logic (used by all entry points).
+        
+        Args:
+            content: Content instance to optimize
+        
+        Returns:
+            Optimized Content instance
+        
+        Raises:
+            InsufficientCreditsError: If account doesn't have enough credits
+        """
+        account = content.account
+        word_count = content.word_count or 0
+        
+        # Check credits
+        try:
+            self.credit_service.check_credits(account, 'optimization', word_count)
+        except InsufficientCreditsError:
+            raise
+        
+        # Analyze content before optimization
+        scores_before = self.analyzer.analyze(content)
+        html_before = content.html_content
+        
+        # Create optimization task
+        task = OptimizationTask.objects.create(
+            content=content,
+            scores_before=scores_before,
+            status='running',
+            html_before=html_before,
+            account=account
+        )
+        
+        try:
+            # Delegate to AI function (actual optimization happens in Celery/AI task)
+            # For now, we'll do a simple optimization pass
+            # In production, this would call the AI function
+            optimized_content = self._optimize_content(content, scores_before)
+            
+            # Analyze optimized content
+            scores_after = self.analyzer.analyze(optimized_content)
+            
+            # Calculate credits used
+            credits_used = self.credit_service.get_credit_cost('optimization', word_count)
+            
+            # Update optimization task
+            task.scores_after = scores_after
+            task.html_after = optimized_content.html_content
+            task.status = 'completed'
+            task.credits_used = credits_used
+            task.save()
+            
+            # Update content
+            content.html_content = optimized_content.html_content
+            content.optimizer_version += 1
+            content.optimization_scores = scores_after
+            content.save(update_fields=['html_content', 'optimizer_version', 'optimization_scores'])
+            
+            # Deduct credits
+            self.credit_service.deduct_credits_for_operation(
+                account=account,
+                operation_type='optimization',
+                amount=word_count,
+                description=f"Content optimization: {content.title or 'Untitled'}",
+                related_object_type='content',
+                related_object_id=content.id,
+                metadata={
+                    'scores_before': scores_before,
+                    'scores_after': scores_after,
+                    'improvement': scores_after.get('overall_score', 0) - scores_before.get('overall_score', 0)
+                }
+            )
+            
+            logger.info(f"Optimized content {content.id}: {scores_before.get('overall_score', 0)} → {scores_after.get('overall_score', 0)}")
+            
+            return content
+            
+        except Exception as e:
+            logger.error(f"Error optimizing content {content.id}: {str(e)}", exc_info=True)
+            task.status = 'failed'
+            task.metadata = {'error': str(e)}
+            task.save()
+            raise
+    
+    def _optimize_content(self, content: Content, scores_before: dict) -> Content:
+        """
+        Internal method to optimize content.
+        This is a placeholder - in production, this would call the AI function.
+        
+        Args:
+            content: Content to optimize
+            scores_before: Scores before optimization
+        
+        Returns:
+            Optimized Content instance
+        """
+        # For now, return content as-is
+        # In production, this would:
+        # 1. Call OptimizeContentFunction AI function
+        # 2. Get optimized HTML
+        # 3. Update content
+        
+        # Placeholder: We'll implement AI function call later
+        # For now, just return the content
+        return content
+    
+    def analyze_only(self, content_id: int) -> dict:
+        """
+        Analyze content without optimizing (for preview).
+        
+        Args:
+            content_id: Content ID to analyze
+        
+        Returns:
+            Analysis scores dict
+        """
+        try:
+            content = Content.objects.get(id=content_id)
+        except Content.DoesNotExist:
+            raise ValueError(f"Content with id {content_id} does not exist")
+        
+        return self.analyzer.analyze(content)
+
+