Add source tracking and sync status fields to Content model; update services module
- Introduced new fields in the Content model for source tracking and sync status, including external references and optimization fields. - Updated the services module to include new content generation and pipeline services for better organization and clarity.
This commit is contained in:
6
backend/igny8_core/business/linking/__init__.py
Normal file
6
backend/igny8_core/business/linking/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Linking Business Logic
|
||||
Phase 4: Linker & Optimizer
|
||||
"""
|
||||
|
||||
|
||||
5
backend/igny8_core/business/linking/services/__init__.py
Normal file
5
backend/igny8_core/business/linking/services/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
Linking Services
|
||||
"""
|
||||
|
||||
|
||||
117
backend/igny8_core/business/linking/services/candidate_engine.py
Normal file
117
backend/igny8_core/business/linking/services/candidate_engine.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Link Candidate Engine
|
||||
Finds relevant content for internal linking
|
||||
"""
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
from django.db import models
|
||||
from igny8_core.business.content.models import Content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CandidateEngine:
|
||||
"""Finds link candidates for content"""
|
||||
|
||||
def find_candidates(self, content: Content, max_candidates: int = 10) -> List[Dict]:
|
||||
"""
|
||||
Find link candidates for a piece of content.
|
||||
|
||||
Args:
|
||||
content: Content instance to find links for
|
||||
max_candidates: Maximum number of candidates to return
|
||||
|
||||
Returns:
|
||||
List of candidate dicts with: {'content_id', 'title', 'url', 'relevance_score', 'anchor_text'}
|
||||
"""
|
||||
if not content or not content.html_content:
|
||||
return []
|
||||
|
||||
# Find relevant content from same account/site/sector
|
||||
relevant_content = self._find_relevant_content(content)
|
||||
|
||||
# Score candidates based on relevance
|
||||
candidates = self._score_candidates(content, relevant_content)
|
||||
|
||||
# Sort by score and return top candidates
|
||||
candidates.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
||||
|
||||
return candidates[:max_candidates]
|
||||
|
||||
def _find_relevant_content(self, content: Content) -> List[Content]:
|
||||
"""Find relevant content from same account/site/sector"""
|
||||
# Get content from same account, site, and sector
|
||||
queryset = Content.objects.filter(
|
||||
account=content.account,
|
||||
site=content.site,
|
||||
sector=content.sector,
|
||||
status__in=['draft', 'review', 'publish']
|
||||
).exclude(id=content.id)
|
||||
|
||||
# Filter by keywords if available
|
||||
if content.primary_keyword:
|
||||
queryset = queryset.filter(
|
||||
models.Q(primary_keyword__icontains=content.primary_keyword) |
|
||||
models.Q(secondary_keywords__icontains=content.primary_keyword)
|
||||
)
|
||||
|
||||
return list(queryset[:50]) # Limit initial query
|
||||
|
||||
def _score_candidates(self, content: Content, candidates: List[Content]) -> List[Dict]:
|
||||
"""Score candidates based on relevance"""
|
||||
scored = []
|
||||
|
||||
for candidate in candidates:
|
||||
score = 0
|
||||
|
||||
# Keyword overlap (higher weight)
|
||||
if content.primary_keyword and candidate.primary_keyword:
|
||||
if content.primary_keyword.lower() in candidate.primary_keyword.lower():
|
||||
score += 30
|
||||
if candidate.primary_keyword.lower() in content.primary_keyword.lower():
|
||||
score += 30
|
||||
|
||||
# Secondary keywords overlap
|
||||
if content.secondary_keywords and candidate.secondary_keywords:
|
||||
overlap = set(content.secondary_keywords) & set(candidate.secondary_keywords)
|
||||
score += len(overlap) * 10
|
||||
|
||||
# Category overlap
|
||||
if content.categories and candidate.categories:
|
||||
overlap = set(content.categories) & set(candidate.categories)
|
||||
score += len(overlap) * 5
|
||||
|
||||
# Tag overlap
|
||||
if content.tags and candidate.tags:
|
||||
overlap = set(content.tags) & set(candidate.tags)
|
||||
score += len(overlap) * 3
|
||||
|
||||
# Recency bonus (newer content gets slight boost)
|
||||
if candidate.generated_at:
|
||||
days_old = (content.generated_at - candidate.generated_at).days
|
||||
if days_old < 30:
|
||||
score += 5
|
||||
|
||||
if score > 0:
|
||||
scored.append({
|
||||
'content_id': candidate.id,
|
||||
'title': candidate.title or candidate.task.title if candidate.task else 'Untitled',
|
||||
'url': f"/content/{candidate.id}/", # Placeholder - actual URL depends on routing
|
||||
'relevance_score': score,
|
||||
'anchor_text': self._generate_anchor_text(candidate, content)
|
||||
})
|
||||
|
||||
return scored
|
||||
|
||||
def _generate_anchor_text(self, candidate: Content, source_content: Content) -> str:
|
||||
"""Generate anchor text for link"""
|
||||
# Use primary keyword if available, otherwise use title
|
||||
if candidate.primary_keyword:
|
||||
return candidate.primary_keyword
|
||||
elif candidate.title:
|
||||
return candidate.title
|
||||
elif candidate.task and candidate.task.title:
|
||||
return candidate.task.title
|
||||
else:
|
||||
return "Learn more"
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
Link Injection Engine
|
||||
Injects internal links into content HTML
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Dict
|
||||
from igny8_core.business.content.models import Content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InjectionEngine:
|
||||
"""Injects links into content HTML"""
|
||||
|
||||
def inject_links(self, content: Content, candidates: List[Dict], max_links: int = 5) -> Dict:
|
||||
"""
|
||||
Inject links into content HTML.
|
||||
|
||||
Args:
|
||||
content: Content instance
|
||||
candidates: List of link candidates from CandidateEngine
|
||||
max_links: Maximum number of links to inject
|
||||
|
||||
Returns:
|
||||
Dict with: {'html_content', 'links', 'links_added'}
|
||||
"""
|
||||
if not content.html_content or not candidates:
|
||||
return {
|
||||
'html_content': content.html_content,
|
||||
'links': [],
|
||||
'links_added': 0
|
||||
}
|
||||
|
||||
html = content.html_content
|
||||
links_added = []
|
||||
links_used = set() # Track which candidates we've used
|
||||
|
||||
# Sort candidates by relevance score
|
||||
sorted_candidates = sorted(candidates, key=lambda x: x.get('relevance_score', 0), reverse=True)
|
||||
|
||||
# Inject links (limit to max_links)
|
||||
for candidate in sorted_candidates[:max_links]:
|
||||
if candidate['content_id'] in links_used:
|
||||
continue
|
||||
|
||||
anchor_text = candidate.get('anchor_text', 'Learn more')
|
||||
url = candidate.get('url', f"/content/{candidate['content_id']}/")
|
||||
|
||||
# Find first occurrence of anchor text in HTML (case-insensitive)
|
||||
pattern = re.compile(re.escape(anchor_text), re.IGNORECASE)
|
||||
match = pattern.search(html)
|
||||
|
||||
if match:
|
||||
# Replace with link
|
||||
link_html = f'<a href="{url}" class="internal-link">{anchor_text}</a>'
|
||||
html = html[:match.start()] + link_html + html[match.end():]
|
||||
|
||||
links_added.append({
|
||||
'content_id': candidate['content_id'],
|
||||
'anchor_text': anchor_text,
|
||||
'url': url,
|
||||
'position': match.start()
|
||||
})
|
||||
links_used.add(candidate['content_id'])
|
||||
|
||||
return {
|
||||
'html_content': html,
|
||||
'links': links_added,
|
||||
'links_added': len(links_added)
|
||||
}
|
||||
|
||||
|
||||
101
backend/igny8_core/business/linking/services/linker_service.py
Normal file
101
backend/igny8_core/business/linking/services/linker_service.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Linker Service
|
||||
Main service for processing content for internal linking
|
||||
"""
|
||||
import logging
|
||||
from typing import List
|
||||
from igny8_core.business.content.models import Content
|
||||
from igny8_core.business.linking.services.candidate_engine import CandidateEngine
|
||||
from igny8_core.business.linking.services.injection_engine import InjectionEngine
|
||||
from igny8_core.business.billing.services.credit_service import CreditService
|
||||
from igny8_core.business.billing.exceptions import InsufficientCreditsError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LinkerService:
|
||||
"""Service for processing content for internal linking"""
|
||||
|
||||
def __init__(self):
|
||||
self.candidate_engine = CandidateEngine()
|
||||
self.injection_engine = InjectionEngine()
|
||||
self.credit_service = CreditService()
|
||||
|
||||
def process(self, content_id: int) -> Content:
|
||||
"""
|
||||
Process content for linking.
|
||||
|
||||
Args:
|
||||
content_id: Content ID to process
|
||||
|
||||
Returns:
|
||||
Updated Content instance
|
||||
|
||||
Raises:
|
||||
InsufficientCreditsError: If account doesn't have enough credits
|
||||
"""
|
||||
try:
|
||||
content = Content.objects.get(id=content_id)
|
||||
except Content.DoesNotExist:
|
||||
raise ValueError(f"Content with id {content_id} does not exist")
|
||||
|
||||
account = content.account
|
||||
|
||||
# Check credits
|
||||
try:
|
||||
self.credit_service.check_credits(account, 'linking')
|
||||
except InsufficientCreditsError:
|
||||
raise
|
||||
|
||||
# Find link candidates
|
||||
candidates = self.candidate_engine.find_candidates(content)
|
||||
|
||||
if not candidates:
|
||||
logger.info(f"No link candidates found for content {content_id}")
|
||||
return content
|
||||
|
||||
# Inject links
|
||||
result = self.injection_engine.inject_links(content, candidates)
|
||||
|
||||
# Update content
|
||||
content.html_content = result['html_content']
|
||||
content.internal_links = result['links']
|
||||
content.linker_version += 1
|
||||
content.save(update_fields=['html_content', 'internal_links', 'linker_version'])
|
||||
|
||||
# Deduct credits
|
||||
self.credit_service.deduct_credits_for_operation(
|
||||
account=account,
|
||||
operation_type='linking',
|
||||
description=f"Internal linking for content: {content.title or 'Untitled'}",
|
||||
related_object_type='content',
|
||||
related_object_id=content.id
|
||||
)
|
||||
|
||||
logger.info(f"Linked content {content_id}: {result['links_added']} links added")
|
||||
|
||||
return content
|
||||
|
||||
def batch_process(self, content_ids: List[int]) -> List[Content]:
|
||||
"""
|
||||
Process multiple content items for linking.
|
||||
|
||||
Args:
|
||||
content_ids: List of content IDs to process
|
||||
|
||||
Returns:
|
||||
List of updated Content instances
|
||||
"""
|
||||
results = []
|
||||
for content_id in content_ids:
|
||||
try:
|
||||
result = self.process(content_id)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing content {content_id}: {str(e)}", exc_info=True)
|
||||
# Continue with other items
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
Reference in New Issue
Block a user