COMPLETED KEYWORDS-LIBRARY-REDESIGN-PLAN.md

This commit is contained in:
IGNY8 VPS (Salman)
2026-01-18 22:05:38 +00:00
parent 05bc433c80
commit 328098a48c
6 changed files with 264 additions and 93 deletions

View File

@@ -864,6 +864,7 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
industry_id = self.request.query_params.get('industry_id')
industry_name = self.request.query_params.get('industry_name')
sector_id = self.request.query_params.get('sector_id')
sector_ids = self.request.query_params.get('sector_ids') # Comma-separated list
sector_name = self.request.query_params.get('sector_name')
difficulty_min = self.request.query_params.get('difficulty_min')
difficulty_max = self.request.query_params.get('difficulty_max')
@@ -871,13 +872,24 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
volume_max = self.request.query_params.get('volume_max')
site_id = self.request.query_params.get('site_id')
available_only = self.request.query_params.get('available_only')
min_words = self.request.query_params.get('min_words')
if industry_id:
queryset = queryset.filter(industry_id=industry_id)
if industry_name:
queryset = queryset.filter(industry__name__icontains=industry_name)
# Support single sector_id OR multiple sector_ids (comma-separated)
if sector_id:
queryset = queryset.filter(sector_id=sector_id)
elif sector_ids:
try:
ids_list = [int(s.strip()) for s in sector_ids.split(',') if s.strip()]
if ids_list:
queryset = queryset.filter(sector_id__in=ids_list)
except (ValueError, TypeError):
pass
if sector_name:
queryset = queryset.filter(sector__name__icontains=sector_name)
@@ -905,6 +917,20 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
except (ValueError, TypeError):
pass
# Word count filtering (for long-tail keywords - 4+ words)
if min_words is not None:
try:
min_word_count = int(min_words)
if min_word_count == 4:
# Long-tail: 4+ words (keywords with at least 3 spaces)
queryset = queryset.filter(keyword__regex=r'^(\S+\s+){3,}\S+$')
elif min_word_count > 1:
# Generic word count filter using regex
pattern = r'^(\S+\s+){' + str(min_word_count - 1) + r',}\S+$'
queryset = queryset.filter(keyword__regex=pattern)
except (ValueError, TypeError):
pass
# Availability filter - exclude keywords already added to the site
if available_only and str(available_only).lower() in ['true', '1', 'yes']:
if site_id:
@@ -1106,6 +1132,8 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
- premium_traffic: Volume >= 50K with fallbacks (50K -> 25K -> 10K)
- long_tail: 4+ words with Volume > threshold (1K -> 500 -> 200)
- quick_wins: Difficulty <= 20, Volume > threshold, AND available
sector_ids: Comma-separated list of IndustrySector IDs to filter by (for site-specific filtering)
"""
from django.db.models import Count, Sum, Q, F
from django.db.models.functions import Length
@@ -1114,6 +1142,7 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
# Get filters
industry_id = request.query_params.get('industry_id')
sector_id = request.query_params.get('sector_id')
sector_ids = request.query_params.get('sector_ids') # Comma-separated list
site_id = request.query_params.get('site_id')
if not industry_id:
@@ -1149,15 +1178,16 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
return qs.count()
return qs.exclude(id__in=already_added_ids).count()
# Helper for dynamic threshold fallback
# Helper for dynamic threshold fallback - returns both total and available
def get_count_with_fallback(qs, thresholds, volume_field='volume'):
"""Try thresholds in order, return first with results."""
for threshold in thresholds:
filtered = qs.filter(**{f'{volume_field}__gte': threshold})
count = filtered.count()
if count > 0:
return {'count': count, 'threshold': threshold}
return {'count': 0, 'threshold': thresholds[-1]}
total_count = filtered.count()
if total_count > 0:
available = count_available(filtered)
return {'count': total_count, 'available': available, 'threshold': threshold}
return {'count': 0, 'available': 0, 'threshold': thresholds[-1]}
# 1. Total keywords
total_count = base_qs.count()
@@ -1166,10 +1196,14 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
available_count = count_available(base_qs)
# 3. High Volume (>= 10K) - simple threshold
high_volume_count = base_qs.filter(volume__gte=10000).count()
high_volume_qs = base_qs.filter(volume__gte=10000)
high_volume_count = high_volume_qs.count()
high_volume_available = count_available(high_volume_qs)
# 3b. Mid Volume (5K-10K)
mid_volume_count = base_qs.filter(volume__gte=5000, volume__lt=10000).count()
mid_volume_qs = base_qs.filter(volume__gte=5000, volume__lt=10000)
mid_volume_count = mid_volume_qs.count()
mid_volume_available = count_available(mid_volume_qs)
# 4. Premium Traffic with dynamic fallback (50K -> 25K -> 10K)
premium_thresholds = [50000, 25000, 10000]
@@ -1199,8 +1233,8 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
'stats': {
'total': {'count': total_count},
'available': {'count': available_count},
'high_volume': {'count': high_volume_count, 'threshold': 10000},
'mid_volume': {'count': mid_volume_count, 'threshold': 5000},
'high_volume': {'count': high_volume_count, 'available': high_volume_available, 'threshold': 10000},
'mid_volume': {'count': mid_volume_count, 'available': mid_volume_available, 'threshold': 5000},
'premium_traffic': premium_result,
'long_tail': long_tail_result,
'quick_wins': quick_wins_result,
@@ -1208,7 +1242,16 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
}
else:
# Get stats per sector in the industry
# Filter by specific sector_ids if provided (for site-specific sectors)
sectors = IndustrySector.objects.filter(industry_id=industry_id)
if sector_ids:
try:
ids_list = [int(s.strip()) for s in sector_ids.split(',') if s.strip()]
if ids_list:
sectors = sectors.filter(id__in=ids_list)
except (ValueError, TypeError):
pass
sectors_data = []
for sector in sectors:
@@ -1219,8 +1262,17 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
continue
sector_available = count_available(sector_qs)
sector_high_volume = sector_qs.filter(volume__gte=10000).count()
sector_mid_volume = sector_qs.filter(volume__gte=5000, volume__lt=10000).count()
# High volume with available count
sector_high_volume_qs = sector_qs.filter(volume__gte=10000)
sector_high_volume = sector_high_volume_qs.count()
sector_high_volume_available = count_available(sector_high_volume_qs)
# Mid volume with available count
sector_mid_volume_qs = sector_qs.filter(volume__gte=5000, volume__lt=10000)
sector_mid_volume = sector_mid_volume_qs.count()
sector_mid_volume_available = count_available(sector_mid_volume_qs)
sector_premium = get_count_with_fallback(sector_qs, premium_thresholds)
sector_long_tail_base = sector_qs.filter(keyword__regex=r'^(\S+\s+){3,}\S+$')
@@ -1237,8 +1289,8 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
'stats': {
'total': {'count': sector_total},
'available': {'count': sector_available},
'high_volume': {'count': sector_high_volume, 'threshold': 10000},
'mid_volume': {'count': sector_mid_volume, 'threshold': 5000},
'high_volume': {'count': sector_high_volume, 'available': sector_high_volume_available, 'threshold': 10000},
'mid_volume': {'count': sector_mid_volume, 'available': sector_mid_volume_available, 'threshold': 5000},
'premium_traffic': sector_premium,
'long_tail': sector_long_tail,
'quick_wins': sector_quick_wins,
@@ -1266,7 +1318,8 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
Returns industries, sectors (filtered by industry), and available filter values.
Supports cascading options based on current filters.
"""
from django.db.models import Count, Min, Max, Q
from django.db.models import Count, Min, Max, Q, Value
from django.db.models.functions import Length, Replace
try:
industry_id = request.query_params.get('industry_id')
@@ -1277,6 +1330,9 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
volume_min = request.query_params.get('volume_min')
volume_max = request.query_params.get('volume_max')
search_term = request.query_params.get('search')
min_words = request.query_params.get('min_words')
site_id = request.query_params.get('site_id')
available_only = request.query_params.get('available_only') == 'true'
# Get industries with keyword counts
industries = Industry.objects.annotate(
@@ -1312,6 +1368,32 @@ class SeedKeywordViewSet(viewsets.ReadOnlyModelViewSet):
base_qs = base_qs.filter(industry_id=industry_id)
if sector_id:
base_qs = base_qs.filter(sector_id=sector_id)
# Apply min_words filter (for long-tail keywords)
if min_words is not None:
try:
min_words_int = int(min_words)
from django.db.models.functions import Length
# Count words by counting spaces + 1
base_qs = base_qs.annotate(
word_count=Length('keyword') - Length(Replace('keyword', Value(' '), Value(''))) + 1
).filter(word_count__gte=min_words_int)
except (ValueError, TypeError):
pass
# Apply available_only filter (exclude keywords already added to site)
if available_only and site_id:
try:
from igny8_core.business.planning.models import Keywords
site_id_int = int(site_id)
# Get seed keyword IDs already added to this site
existing_seed_ids = Keywords.objects.filter(
site_id=site_id_int,
seed_keyword__isnull=False
).values_list('seed_keyword_id', flat=True)
base_qs = base_qs.exclude(id__in=existing_seed_ids)
except (ValueError, TypeError):
pass
# Countries options - apply all filters except country itself
countries_qs = base_qs