KW_DB & Maangement of KW

This commit is contained in:
IGNY8 VPS (Salman)
2026-01-13 12:00:16 +00:00
parent d2b733640c
commit 95e316cde2
37 changed files with 9224 additions and 0 deletions

View File

@@ -0,0 +1,321 @@
#!/usr/bin/env python3
"""
Import Seed Keywords from Single CSV File
This script imports keywords from a single CSV file into the IGNY8 global keywords database.
Use this for testing before running full import.
DUPLICATE HANDLING:
- Checks: keyword + country (case-insensitive)
- If duplicate exists in same industry+sector: SKIPS import
Usage:
docker compose -f docker-compose.app.yml exec igny8_backend \\
python3 /app/scripts/import_seed_keywords_single.py \\
--csv /app/../KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_physical-therapy_matching-terms_2025-12-19_04-25-15.csv \\
--industry "HealthCare Medical" \\
--sector "Physiotherapy Rehabilitation" \\
--dry-run
Author: IGNY8 Team
Date: January 13, 2026
"""
import os
import sys
import csv
import argparse
import django
from pathlib import Path
# Change to app directory for Django imports
sys.path.insert(0, '/app')
os.chdir('/app')
# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')
django.setup()
from django.utils.text import slugify
from django.db import transaction
from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword
class KeywordImporter:
"""Import keywords from CSV into database"""
def __init__(self, dry_run=False, verbose=False):
self.dry_run = dry_run
self.verbose = verbose
self.stats = {
'processed': 0,
'imported': 0,
'skipped_duplicate': 0,
'skipped_invalid': 0,
'errors': 0
}
def log(self, message, force=False):
"""Print message if verbose or forced"""
if self.verbose or force:
print(message)
def get_or_create_industry(self, name):
"""Get or create Industry record"""
slug = slugify(name)
if self.dry_run:
self.log(f"[DRY RUN] Would get/create Industry: {name} (slug: {slug})")
# Return a mock object for dry run
class MockIndustry:
def __init__(self):
self.id = 0
self.name = name
self.slug = slug
return MockIndustry(), False
industry, created = Industry.objects.get_or_create(
slug=slug,
defaults={
'name': name,
'is_active': True,
'description': f'Auto-imported from KW_DB'
}
)
if created:
self.log(f"✓ Created Industry: {name}", force=True)
else:
self.log(f"✓ Found existing Industry: {name}")
return industry, created
def get_or_create_sector(self, industry, name):
"""Get or create IndustrySector record"""
slug = slugify(name)
if self.dry_run:
self.log(f"[DRY RUN] Would get/create Sector: {name} (slug: {slug})")
class MockSector:
def __init__(self):
self.id = 0
self.name = name
self.slug = slug
return MockSector(), False
sector, created = IndustrySector.objects.get_or_create(
industry=industry,
slug=slug,
defaults={
'name': name,
'is_active': True,
'description': f'Auto-imported from KW_DB'
}
)
if created:
self.log(f" ✓ Created Sector: {name}", force=True)
else:
self.log(f" ✓ Found existing Sector: {name}")
return sector, created
def is_duplicate(self, keyword, country, industry, sector):
"""
Check if keyword already exists with same country in this industry+sector.
Duplicate check: keyword + country (case-insensitive)
"""
if self.dry_run:
return False # Skip duplicate check in dry run
exists = SeedKeyword.objects.filter(
keyword__iexact=keyword,
country=country,
industry=industry,
sector=sector
).exists()
return exists
def import_keyword(self, keyword_data, industry, sector):
"""Import single keyword record"""
keyword = keyword_data['keyword']
country = keyword_data['country']
volume = keyword_data['volume']
difficulty = keyword_data['difficulty']
# Check for duplicate (keyword + country)
if self.is_duplicate(keyword, country, industry, sector):
self.log(f" ⊘ SKIP (duplicate): {keyword} [{country}]")
self.stats['skipped_duplicate'] += 1
return False
if self.dry_run:
self.log(f" [DRY RUN] Would import: {keyword} [{country}] (vol:{volume}, diff:{difficulty})")
return True
# Create keyword
SeedKeyword.objects.create(
keyword=keyword,
industry=industry,
sector=sector,
volume=volume,
difficulty=difficulty,
country=country,
is_active=True
)
self.log(f" ✓ Imported: {keyword} [{country}] (vol:{volume}, diff:{difficulty})")
return True
def parse_csv_row(self, row):
"""Parse CSV row and extract keyword data"""
try:
keyword = row.get('Keyword', '').strip()
if not keyword:
return None
# Parse country (default to US)
country_raw = row.get('Country', 'us').strip().upper()
if not country_raw:
country_raw = 'US'
# Parse volume (default to 0)
volume_raw = row.get('Volume', '0').strip()
try:
volume = int(volume_raw) if volume_raw else 0
except (ValueError, TypeError):
volume = 0
# Parse difficulty (default to 0, clamp to 0-100)
difficulty_raw = row.get('Difficulty', '0').strip()
try:
difficulty = int(difficulty_raw) if difficulty_raw else 0
difficulty = max(0, min(100, difficulty)) # Clamp to 0-100
except (ValueError, TypeError):
difficulty = 0
return {
'keyword': keyword,
'country': country_raw,
'volume': volume,
'difficulty': difficulty
}
except Exception as e:
self.log(f" ⚠ Error parsing row: {e}")
return None
def import_csv(self, csv_path, industry_name, sector_name):
"""Import keywords from CSV file"""
csv_path = Path(csv_path)
if not csv_path.exists():
print(f"❌ ERROR: CSV file not found: {csv_path}")
return False
print(f"\n{'='*70}")
print(f"IMPORTING SEED KEYWORDS FROM CSV")
print(f"{'='*70}")
print(f"File: {csv_path.name}")
print(f"Industry: {industry_name}")
print(f"Sector: {sector_name}")
if self.dry_run:
print("Mode: DRY RUN (no database changes)")
print(f"{'='*70}\n")
# Get or create Industry and Sector
industry, _ = self.get_or_create_industry(industry_name)
sector, _ = self.get_or_create_sector(industry, sector_name)
# Read and import CSV
print(f"Processing keywords...\n")
try:
with transaction.atomic():
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
self.stats['processed'] += 1
keyword_data = self.parse_csv_row(row)
if not keyword_data:
self.stats['skipped_invalid'] += 1
continue
if self.import_keyword(keyword_data, industry, sector):
self.stats['imported'] += 1
# Rollback in dry run mode
if self.dry_run:
transaction.set_rollback(True)
except Exception as e:
print(f"\n❌ ERROR: {e}")
import traceback
traceback.print_exc()
self.stats['errors'] += 1
return False
# Print summary
print(f"\n{'='*70}")
print(f"IMPORT SUMMARY")
print(f"{'='*70}")
print(f"Total rows processed: {self.stats['processed']}")
print(f"✓ Imported: {self.stats['imported']}")
print(f"⊘ Skipped (duplicate): {self.stats['skipped_duplicate']}")
print(f"⊘ Skipped (invalid): {self.stats['skipped_invalid']}")
print(f"✗ Errors: {self.stats['errors']}")
print(f"{'='*70}\n")
if self.dry_run:
print(" This was a DRY RUN - no data was saved to database")
print("Remove --dry-run flag to perform actual import\n")
else:
print("✓ Import completed successfully!")
print(f"✓ Check Django admin: /admin/auth/seedkeyword/\n")
return True
def main():
parser = argparse.ArgumentParser(
description='Import seed keywords from single CSV file (with duplicate check: keyword+country)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Dry run (preview only)
docker compose -f docker-compose.app.yml exec igny8_backend \\
python3 /app/scripts/import_seed_keywords_single.py \\
--csv /app/../KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_muscle-stimulator_matching-terms_2025-12-19_04-25-32.csv \\
--industry "HealthCare Medical" \\
--sector "Physiotherapy Rehabilitation" \\
--dry-run --verbose
# Actual import
docker compose -f docker-compose.app.yml exec igny8_backend \\
python3 /app/scripts/import_seed_keywords_single.py \\
--csv /app/../KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_muscle-stimulator_matching-terms_2025-12-19_04-25-32.csv \\
--industry "HealthCare Medical" \\
--sector "Physiotherapy Rehabilitation"
"""
)
parser.add_argument('--csv', required=True, help='Path to CSV file')
parser.add_argument('--industry', required=True, help='Industry name')
parser.add_argument('--sector', required=True, help='Sector name')
parser.add_argument('--dry-run', action='store_true', help='Preview without saving to database')
parser.add_argument('--verbose', action='store_true', help='Show detailed progress')
args = parser.parse_args()
# Create importer and run
importer = KeywordImporter(dry_run=args.dry_run, verbose=args.verbose)
success = importer.import_csv(args.csv, args.industry, args.sector)
sys.exit(0 if success else 1)
if __name__ == '__main__':
main()