#!/usr/bin/env python """ Import Seed Keywords from Single CSV File This script imports keywords from a single CSV file into the IGNY8 global keywords database. Use this for testing before running full import. Usage: cd /data/app/igny8/backend python ../KW_DB/management/import_single_csv.py \\ --csv /data/app/igny8/KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_physical-therapy_matching-terms_2025-12-19_04-25-15.csv \\ --industry "HealthCare Medical" \\ --sector "Physiotherapy Rehabilitation" \\ --dry-run Author: IGNY8 Team Date: January 13, 2026 """ import os import sys import csv import argparse from pathlib import Path # Add Django project to path # Use /app when running in Docker, /data/app/igny8/backend when running locally backend_path = '/app' if os.path.exists('/app/manage.py') else '/data/app/igny8/backend' sys.path.insert(0, backend_path) os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings') import django django.setup() from django.utils.text import slugify from django.db import transaction from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword class KeywordImporter: """Import keywords from CSV into database""" def __init__(self, dry_run=False, verbose=False): self.dry_run = dry_run self.verbose = verbose self.stats = { 'processed': 0, 'imported': 0, 'skipped_duplicate': 0, 'skipped_invalid': 0, 'errors': 0 } def log(self, message, force=False): """Print message if verbose or forced""" if self.verbose or force: print(message) def get_or_create_industry(self, name): """Get or create Industry record""" slug = slugify(name) if self.dry_run: self.log(f"[DRY RUN] Would get/create Industry: {name} (slug: {slug})") # Return a mock object for dry run class MockIndustry: id = 0 name = name slug = slug return MockIndustry(), False industry, created = Industry.objects.get_or_create( slug=slug, defaults={ 'name': name, 'is_active': True, 'description': f'Auto-imported from KW_DB' } ) if created: self.log(f"✓ Created Industry: {name}", force=True) else: self.log(f"✓ Found existing Industry: {name}") return industry, created def get_or_create_sector(self, industry, name): """Get or create IndustrySector record""" slug = slugify(name) if self.dry_run: self.log(f"[DRY RUN] Would get/create Sector: {name} (slug: {slug})") class MockSector: id = 0 name = name slug = slug return MockSector(), False sector, created = IndustrySector.objects.get_or_create( industry=industry, slug=slug, defaults={ 'name': name, 'is_active': True, 'description': f'Auto-imported from KW_DB' } ) if created: self.log(f" ✓ Created Sector: {name}", force=True) else: self.log(f" ✓ Found existing Sector: {name}") return sector, created def is_duplicate(self, keyword, country, industry, sector): """ Check if keyword already exists with same country in this industry+sector. Duplicate check: keyword + country (case-insensitive) """ if self.dry_run: return False # Skip duplicate check in dry run exists = SeedKeyword.objects.filter( keyword__iexact=keyword, country=country, industry=industry, sector=sector ).exists() return exists def import_keyword(self, keyword_data, industry, sector): """Import single keyword record""" keyword = keyword_data['keyword'] country = keyword_data['country'] volume = keyword_data['volume'] difficulty = keyword_data['difficulty'] # Check for duplicate (keyword + country) if self.is_duplicate(keyword, country, industry, sector): self.log(f" ⊘ SKIP (duplicate): {keyword} [{country}]") self.stats['skipped_duplicate'] += 1 return False if self.dry_run: self.log(f" [DRY RUN] Would import: {keyword} [{country}] (vol:{volume}, diff:{difficulty})") return True # Create keyword SeedKeyword.objects.create( keyword=keyword, industry=industry, sector=sector, volume=volume, difficulty=difficulty, country=country, is_active=True ) self.log(f" ✓ Imported: {keyword} [{country}] (vol:{volume}, diff:{difficulty})") return True def parse_csv_row(self, row): """Parse CSV row and extract keyword data""" try: keyword = row.get('Keyword', '').strip() if not keyword: return None # Parse country (default to US) country_raw = row.get('Country', 'us').strip().upper() if not country_raw: country_raw = 'US' # Parse volume (default to 0) volume_raw = row.get('Volume', '0').strip() try: volume = int(volume_raw) if volume_raw else 0 except (ValueError, TypeError): volume = 0 # Parse difficulty (default to 0, clamp to 0-100) difficulty_raw = row.get('Difficulty', '0').strip() try: difficulty = int(difficulty_raw) if difficulty_raw else 0 difficulty = max(0, min(100, difficulty)) # Clamp to 0-100 except (ValueError, TypeError): difficulty = 0 return { 'keyword': keyword, 'country': country_raw, 'volume': volume, 'difficulty': difficulty } except Exception as e: self.log(f" ⚠ Error parsing row: {e}") return None def import_csv(self, csv_path, industry_name, sector_name): """Import keywords from CSV file""" csv_path = Path(csv_path) if not csv_path.exists(): print(f"❌ ERROR: CSV file not found: {csv_path}") return False print(f"\n{'='*60}") print(f"IMPORTING FROM: {csv_path.name}") print(f"Industry: {industry_name}") print(f"Sector: {sector_name}") if self.dry_run: print("MODE: DRY RUN (no database changes)") print(f"{'='*60}\n") # Get or create Industry and Sector industry, _ = self.get_or_create_industry(industry_name) sector, _ = self.get_or_create_sector(industry, sector_name) # Read and import CSV print(f"\nProcessing keywords...") try: with transaction.atomic(): with open(csv_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: self.stats['processed'] += 1 keyword_data = self.parse_csv_row(row) if not keyword_data: self.stats['skipped_invalid'] += 1 continue if self.import_keyword(keyword_data, industry, sector): self.stats['imported'] += 1 # Rollback in dry run mode if self.dry_run: transaction.set_rollback(True) except Exception as e: print(f"\n❌ ERROR: {e}") self.stats['errors'] += 1 return False # Print summary print(f"\n{'='*60}") print(f"IMPORT SUMMARY") print(f"{'='*60}") print(f"Total rows processed: {self.stats['processed']}") print(f"✓ Imported: {self.stats['imported']}") print(f"⊘ Skipped (dup): {self.stats['skipped_duplicate']}") print(f"⊘ Skipped (invalid): {self.stats['skipped_invalid']}") print(f"✗ Errors: {self.stats['errors']}") print(f"{'='*60}\n") if self.dry_run: print("ℹ This was a DRY RUN - no data was saved to database") print("Remove --dry-run flag to perform actual import\n") else: print("✓ Import completed successfully!\n") return True def main(): parser = argparse.ArgumentParser( description='Import seed keywords from single CSV file', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Dry run (preview only) python import_single_csv.py --csv /path/to/file.csv --industry "HealthCare Medical" --sector "Physiotherapy Rehabilitation" --dry-run # Actual import with verbose output python import_single_csv.py --csv /path/to/file.csv --industry "HealthCare Medical" --sector "Physiotherapy Rehabilitation" --verbose """ ) parser.add_argument('--csv', required=True, help='Path to CSV file') parser.add_argument('--industry', required=True, help='Industry name') parser.add_argument('--sector', required=True, help='Sector name') parser.add_argument('--dry-run', action='store_true', help='Preview without saving to database') parser.add_argument('--verbose', action='store_true', help='Show detailed progress') args = parser.parse_args() # Create importer and run importer = KeywordImporter(dry_run=args.dry_run, verbose=args.verbose) success = importer.import_csv(args.csv, args.industry, args.sector) sys.exit(0 if success else 1) if __name__ == '__main__': main()