This commit is contained in:
alorig
2025-11-18 07:13:34 +05:00
parent 51c3986e01
commit 2074191eee
17 changed files with 2578 additions and 0 deletions

View File

@@ -97,5 +97,237 @@ class LinkerService:
continue
return results
def process_product(self, content_id: int) -> Content:
"""
Process product content for linking (Phase 8).
Enhanced linking for products: links to related products, categories, and service pages.
Args:
content_id: Content ID to process (must be entity_type='product')
Returns:
Updated Content instance
"""
try:
content = Content.objects.get(id=content_id, entity_type='product')
except Content.DoesNotExist:
raise ValueError(f"Product content with id {content_id} does not exist")
# Use base process but with product-specific candidate finding
account = content.account
# Check credits
try:
self.credit_service.check_credits(account, 'linking')
except InsufficientCreditsError:
raise
# Find product-specific link candidates (related products, categories, services)
candidates = self._find_product_candidates(content)
if not candidates:
logger.info(f"No link candidates found for product content {content_id}")
return content
# Inject links
result = self.injection_engine.inject_links(content, candidates)
# Update content
content.html_content = result['html_content']
content.internal_links = result['links']
content.linker_version += 1
content.save(update_fields=['html_content', 'internal_links', 'linker_version'])
# Deduct credits
self.credit_service.deduct_credits_for_operation(
account=account,
operation_type='linking',
description=f"Product linking for: {content.title or 'Untitled'}",
related_object_type='content',
related_object_id=content.id
)
logger.info(f"Linked product content {content_id}: {result['links_added']} links added")
return content
def process_taxonomy(self, content_id: int) -> Content:
"""
Process taxonomy content for linking (Phase 8).
Enhanced linking for taxonomies: links to related categories, tags, and content.
Args:
content_id: Content ID to process (must be entity_type='taxonomy')
Returns:
Updated Content instance
"""
try:
content = Content.objects.get(id=content_id, entity_type='taxonomy')
except Content.DoesNotExist:
raise ValueError(f"Taxonomy content with id {content_id} does not exist")
# Use base process but with taxonomy-specific candidate finding
account = content.account
# Check credits
try:
self.credit_service.check_credits(account, 'linking')
except InsufficientCreditsError:
raise
# Find taxonomy-specific link candidates (related taxonomies, categories, content)
candidates = self._find_taxonomy_candidates(content)
if not candidates:
logger.info(f"No link candidates found for taxonomy content {content_id}")
return content
# Inject links
result = self.injection_engine.inject_links(content, candidates)
# Update content
content.html_content = result['html_content']
content.internal_links = result['links']
content.linker_version += 1
content.save(update_fields=['html_content', 'internal_links', 'linker_version'])
# Deduct credits
self.credit_service.deduct_credits_for_operation(
account=account,
operation_type='linking',
description=f"Taxonomy linking for: {content.title or 'Untitled'}",
related_object_type='content',
related_object_id=content.id
)
logger.info(f"Linked taxonomy content {content_id}: {result['links_added']} links added")
return content
def _find_product_candidates(self, content: Content) -> List[dict]:
"""
Find link candidates specific to product content.
Args:
content: Product Content instance
Returns:
List of candidate dicts
"""
candidates = []
# Find related products (same category, similar features)
related_products = Content.objects.filter(
account=content.account,
site=content.site,
sector=content.sector,
entity_type='product',
status__in=['draft', 'review', 'publish']
).exclude(id=content.id)
# Use structure_data to find products with similar categories/features
if content.structure_data:
product_type = content.structure_data.get('product_type')
if product_type:
related_products = related_products.filter(
structure_data__product_type=product_type
)
# Add product candidates
for product in related_products[:5]: # Limit to 5 related products
candidates.append({
'content_id': product.id,
'title': product.title or 'Untitled Product',
'url': f'/products/{product.id}', # Placeholder URL
'relevance_score': 0.8,
'anchor_text': product.title or 'Related Product'
})
# Find related service pages
related_services = Content.objects.filter(
account=content.account,
site=content.site,
sector=content.sector,
entity_type='service',
status__in=['draft', 'review', 'publish']
)[:3] # Limit to 3 related services
for service in related_services:
candidates.append({
'content_id': service.id,
'title': service.title or 'Untitled Service',
'url': f'/services/{service.id}', # Placeholder URL
'relevance_score': 0.6,
'anchor_text': service.title or 'Related Service'
})
# Use base candidate engine for additional candidates
base_candidates = self.candidate_engine.find_candidates(content, max_candidates=5)
candidates.extend(base_candidates)
return candidates
def _find_taxonomy_candidates(self, content: Content) -> List[dict]:
"""
Find link candidates specific to taxonomy content.
Args:
content: Taxonomy Content instance
Returns:
List of candidate dicts
"""
candidates = []
# Find related taxonomies
related_taxonomies = Content.objects.filter(
account=content.account,
site=content.site,
sector=content.sector,
entity_type='taxonomy',
status__in=['draft', 'review', 'publish']
).exclude(id=content.id)[:5] # Limit to 5 related taxonomies
for taxonomy in related_taxonomies:
candidates.append({
'content_id': taxonomy.id,
'title': taxonomy.title or 'Untitled Taxonomy',
'url': f'/taxonomy/{taxonomy.id}', # Placeholder URL
'relevance_score': 0.7,
'anchor_text': taxonomy.title or 'Related Taxonomy'
})
# Find content in this taxonomy (using json_blocks categories/tags)
if content.json_blocks:
for block in content.json_blocks:
if block.get('type') == 'categories':
categories = block.get('items', [])
for category in categories[:3]: # Limit to 3 categories
category_name = category.get('name', '')
if category_name:
related_content = Content.objects.filter(
account=content.account,
site=content.site,
sector=content.sector,
categories__icontains=category_name,
status__in=['draft', 'review', 'publish']
).exclude(id=content.id)[:3]
for related in related_content:
candidates.append({
'content_id': related.id,
'title': related.title or 'Untitled',
'url': f'/content/{related.id}', # Placeholder URL
'relevance_score': 0.6,
'anchor_text': related.title or 'Related Content'
})
# Use base candidate engine for additional candidates
base_candidates = self.candidate_engine.find_candidates(content, max_candidates=5)
candidates.extend(base_candidates)
return candidates