Initial commit: igny8 project

This commit is contained in:
igny8
2025-11-09 10:27:02 +00:00
commit 60b8188111
27265 changed files with 4360521 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
"""
AI Function implementations
"""

View File

@@ -0,0 +1,345 @@
"""
Auto Cluster Keywords AI Function
"""
import logging
from typing import Dict, List, Any
from django.db import transaction
from igny8_core.ai.base import BaseAIFunction
from igny8_core.modules.planner.models import Keywords, Clusters
from igny8_core.modules.system.utils import get_prompt_value
logger = logging.getLogger(__name__)
class AutoClusterFunction(BaseAIFunction):
"""Auto-cluster keywords using AI"""
def get_name(self) -> str:
return 'auto_cluster'
def get_metadata(self) -> Dict:
return {
'display_name': 'Auto Cluster Keywords',
'description': 'Group related keywords into semantic clusters using AI',
'phases': {
'INIT': 'Initializing clustering...',
'PREP': 'Loading keywords...',
'AI_CALL': 'Analyzing keyword relationships...',
'PARSE': 'Parsing cluster data...',
'SAVE': 'Creating clusters...',
'DONE': 'Clustering complete!'
}
}
def get_max_items(self) -> int:
return 20
def validate(self, payload: dict, account=None) -> Dict:
"""Custom validation for clustering with plan limit checks"""
result = super().validate(payload, account)
if not result['valid']:
return result
# Additional validation: check keywords exist
ids = payload.get('ids', [])
queryset = Keywords.objects.filter(id__in=ids)
if account:
queryset = queryset.filter(account=account)
if queryset.count() == 0:
return {'valid': False, 'error': 'No keywords found'}
# Plan limit validation
if account:
plan = getattr(account, 'plan', None)
if plan:
from django.utils import timezone
from igny8_core.modules.planner.models import Clusters
# Check daily cluster limit
now = timezone.now()
start_of_day = now.replace(hour=0, minute=0, second=0, microsecond=0)
clusters_today = Clusters.objects.filter(
account=account,
created_at__gte=start_of_day
).count()
if plan.daily_cluster_limit and clusters_today >= plan.daily_cluster_limit:
return {
'valid': False,
'error': f'Daily cluster limit reached ({plan.daily_cluster_limit} clusters per day). Please try again tomorrow.'
}
# Check max clusters limit
total_clusters = Clusters.objects.filter(account=account).count()
if plan.max_clusters and total_clusters >= plan.max_clusters:
return {
'valid': False,
'error': f'Maximum cluster limit reached ({plan.max_clusters} clusters). Please upgrade your plan or delete existing clusters.'
}
else:
return {'valid': False, 'error': 'Account does not have an active plan'}
return {'valid': True}
def prepare(self, payload: dict, account=None) -> Dict:
"""Load keywords with relationships"""
ids = payload.get('ids', [])
sector_id = payload.get('sector_id')
queryset = Keywords.objects.filter(id__in=ids)
if account:
queryset = queryset.filter(account=account)
if sector_id:
queryset = queryset.filter(sector_id=sector_id)
keywords = list(queryset.select_related('account', 'site', 'site__account', 'sector', 'sector__site'))
if not keywords:
raise ValueError("No keywords found")
# Store original keyword objects for later use
return {
'keywords': keywords,
'keyword_data': [
{
'id': kw.id,
'keyword': kw.keyword,
'volume': kw.volume,
'difficulty': kw.difficulty,
'intent': kw.intent,
}
for kw in keywords
],
'sector_id': sector_id
}
def build_prompt(self, data: Dict, account=None) -> str:
"""Build clustering prompt"""
keyword_data = data['keyword_data']
sector_id = data.get('sector_id')
# Get prompt template
prompt_template = get_prompt_value(account, 'clustering')
# Format keywords
keywords_text = '\n'.join([
f"- {kw['keyword']} (Volume: {kw['volume']}, Difficulty: {kw['difficulty']}, Intent: {kw['intent']})"
for kw in keyword_data
])
prompt = prompt_template.replace('[IGNY8_KEYWORDS]', keywords_text)
# Add sector context if available
if sector_id:
try:
from igny8_core.auth.models import Sector
sector = Sector.objects.get(id=sector_id)
if sector:
prompt += f"\n\nNote: These keywords are for the '{sector.name}' sector."
except Exception:
pass
# IMPORTANT: When using JSON mode, OpenAI requires explicit JSON instruction
# The prompt template already includes "Format the output as a JSON object"
# but we need to ensure it's explicit for JSON mode compliance
# Check if prompt already explicitly requests JSON (case-insensitive)
prompt_lower = prompt.lower()
has_json_request = (
'json' in prompt_lower and
('format' in prompt_lower or 'respond' in prompt_lower or 'return' in prompt_lower or 'output' in prompt_lower)
)
if not has_json_request:
prompt += "\n\nIMPORTANT: You must respond with valid JSON only. The response must be a JSON object with a 'clusters' array."
return prompt
def parse_response(self, response: str, step_tracker=None) -> List[Dict]:
"""Parse AI response into cluster data"""
import json
from igny8_core.ai.processor import AIProcessor
if not response or not response.strip():
error_msg = "Empty response from AI"
logger.error(f"parse_response: {error_msg}")
raise ValueError(error_msg)
# Try direct JSON parse first (most common case with JSON mode)
json_data = None
try:
json_data = json.loads(response.strip())
except json.JSONDecodeError as e:
logger.warning(f"parse_response: Direct JSON parse failed: {e}, trying extract_json method")
# Fall back to extract_json method which handles markdown code blocks
processor = AIProcessor()
json_data = processor.extract_json(response)
if not json_data:
error_msg = f"Failed to parse clustering response. Response: {response[:200]}..."
logger.error(f"parse_response: {error_msg}")
raise ValueError(error_msg)
# Extract clusters array
clusters = []
if isinstance(json_data, dict):
if 'clusters' in json_data:
clusters = json_data.get('clusters', [])
else:
# Try to find clusters in any key
for key, value in json_data.items():
if isinstance(value, list) and len(value) > 0:
if isinstance(value[0], dict) and ('name' in value[0] or 'keywords' in value[0]):
clusters = value
break
elif isinstance(json_data, list):
clusters = json_data
if not clusters:
error_msg = f"No clusters found in AI response. JSON data: {json_data}"
logger.error(f"parse_response: {error_msg}")
raise ValueError(error_msg)
# Step tracking is handled by the engine - don't add steps here
return clusters
def save_output(
self,
parsed: List[Dict],
original_data: Dict,
account=None,
progress_tracker=None,
step_tracker=None
) -> Dict:
"""Save clusters to database"""
keywords = original_data['keywords']
sector_id = original_data.get('sector_id')
if not keywords:
raise ValueError("No keywords available for saving")
# Get context from first keyword (account/site/sector already validated at page level)
first_keyword = keywords[0]
account = account or first_keyword.account
site = first_keyword.site
# Get sector if needed
from igny8_core.auth.models import Sector
sector = first_keyword.sector
if not sector and sector_id:
try:
sector = Sector.objects.get(id=sector_id)
except Sector.DoesNotExist:
sector = None
if not account:
raise ValueError("Account is required for cluster creation")
clusters_created = 0
keywords_updated = 0
with transaction.atomic():
for idx, cluster_data in enumerate(parsed):
if progress_tracker:
progress = 80 + int((idx / len(parsed)) * 15)
progress_tracker.update(
"SAVE",
progress,
f"Creating cluster {idx + 1}/{len(parsed)}...",
current=idx + 1,
total=len(parsed),
current_item=cluster_data.get('name', '')
)
cluster_name = cluster_data.get('name', '')
cluster_keywords = cluster_data.get('keywords', [])
if not cluster_name or not cluster_keywords:
continue
# Get or create cluster
if sector:
cluster, created = Clusters.objects.get_or_create(
name=cluster_name,
account=account,
site=site,
sector=sector,
defaults={
'description': cluster_data.get('description', ''),
'status': 'active',
}
)
else:
cluster, created = Clusters.objects.get_or_create(
name=cluster_name,
account=account,
site=site,
sector__isnull=True,
defaults={
'description': cluster_data.get('description', ''),
'status': 'active',
'sector': None,
}
)
if created:
clusters_created += 1
# Match and assign keywords (case-insensitive)
cluster_keywords_normalized = {kw.strip().lower(): kw.strip() for kw in cluster_keywords}
available_keywords_normalized = {
kw_obj.keyword.strip().lower(): kw_obj
for kw_obj in keywords
}
matched_keyword_objects = []
for cluster_kw_normalized, cluster_kw_original in cluster_keywords_normalized.items():
if cluster_kw_normalized in available_keywords_normalized:
matched_keyword_objects.append(available_keywords_normalized[cluster_kw_normalized])
# Update matched keywords
if matched_keyword_objects:
matched_ids = [kw.id for kw in matched_keyword_objects]
keyword_filter = Keywords.objects.filter(
id__in=matched_ids,
account=account
)
if sector:
keyword_filter = keyword_filter.filter(sector=sector)
else:
keyword_filter = keyword_filter.filter(sector__isnull=True)
updated_count = keyword_filter.update(
cluster=cluster,
status='mapped'
)
keywords_updated += updated_count
# Recalculate cluster metrics
from django.db.models import Sum, Case, When, F, IntegerField
cluster_filter = Clusters.objects.filter(account=account)
if sector:
cluster_filter = cluster_filter.filter(sector=sector)
else:
cluster_filter = cluster_filter.filter(sector__isnull=True)
for cluster in cluster_filter:
cluster.keywords_count = Keywords.objects.filter(cluster=cluster).count()
# Volume calculation: use volume_override if available, otherwise seed_keyword__volume
volume_sum = Keywords.objects.filter(cluster=cluster).aggregate(
total=Sum(
Case(
When(volume_override__isnull=False, then=F('volume_override')),
default=F('seed_keyword__volume'),
output_field=IntegerField()
)
)
)['total']
cluster.volume = volume_sum or 0
cluster.save()
return {
'count': clusters_created,
'clusters_created': clusters_created,
'keywords_updated': keywords_updated
}