#!/usr/bin/env python3 """ Import US/CA Technology & IT Services Keywords Targeted import script for the "US CA" folder in Technology_&_IT_Services. Maps folder names to existing DB sectors and imports all CSV files. Usage: docker exec igny8_backend python3 /app/scripts/import_us_ca_tech.py --dry-run docker exec igny8_backend python3 /app/scripts/import_us_ca_tech.py """ import os import sys import csv import argparse sys.path.insert(0, '/app') os.chdir('/app') os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings') import django django.setup() from pathlib import Path from django.db import transaction from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword # Hard-coded mapping: folder name in "US CA" -> sector slug in DB FOLDER_TO_SECTOR_SLUG = { 'Automation & Workflow Systems': 'automation-workflow-systems', 'Cloud Services': 'cloud-services', 'DATA & AI Services': 'data-ai-services', 'Digital Marketing & SEO': 'digital-marketing-seo', 'SAAS': 'saas', 'Web Development & Design': 'web-development-design', } INDUSTRY_ID = 27 # Technology & IT Services BASE_PATH = Path('/data/app/igny8/KW_DB/Technology_&_IT_Services/US CA') def parse_csv_row(row): """Parse a CSV row into keyword data.""" keyword = row.get('Keyword', '').strip() if not keyword: return None country_raw = row.get('Country', 'US').strip().upper() if not country_raw: country_raw = 'US' volume_raw = row.get('Volume', '0').strip() try: volume = int(volume_raw) if volume_raw else 0 except (ValueError, TypeError): volume = 0 difficulty_raw = row.get('Difficulty', '0').strip() try: difficulty = int(difficulty_raw) if difficulty_raw else 0 difficulty = max(0, min(100, difficulty)) except (ValueError, TypeError): difficulty = 0 return { 'keyword': keyword, 'country': country_raw, 'volume': volume, 'difficulty': difficulty, } def import_csv(csv_path, industry, sector, dry_run=False): """Import a single CSV file, returns stats dict.""" stats = {'processed': 0, 'imported': 0, 'skipped_dup': 0, 'skipped_inv': 0, 'errors': 0} try: with open(csv_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: stats['processed'] += 1 data = parse_csv_row(row) if not data: stats['skipped_inv'] += 1 continue # Duplicate check: keyword (case-insensitive) in same industry+sector # DB unique constraint is (keyword, industry_id, sector_id) - no country exists = SeedKeyword.objects.filter( keyword__iexact=data['keyword'], industry=industry, sector=sector, ).exists() if exists: stats['skipped_dup'] += 1 continue if not dry_run: SeedKeyword.objects.create( keyword=data['keyword'], industry=industry, sector=sector, volume=data['volume'], difficulty=data['difficulty'], country=data['country'], is_active=True, ) stats['imported'] += 1 except Exception as e: print(f" āŒ Error: {e}") stats['errors'] += 1 return stats def main(): parser = argparse.ArgumentParser(description='Import US/CA Tech keywords') parser.add_argument('--dry-run', action='store_true', help='Preview without saving') args = parser.parse_args() industry = Industry.objects.get(id=INDUSTRY_ID) print(f"\n{'='*70}") print(f"IMPORT: Technology & IT Services — US CA Keywords") print(f"{'='*70}") print(f"Industry: {industry.name} (id={industry.id})") print(f"Mode: {'DRY RUN' if args.dry_run else 'LIVE IMPORT'}") print(f"{'='*70}\n") totals = {'files': 0, 'processed': 0, 'imported': 0, 'skipped_dup': 0, 'skipped_inv': 0, 'errors': 0} for folder_name, sector_slug in sorted(FOLDER_TO_SECTOR_SLUG.items()): folder_path = BASE_PATH / folder_name if not folder_path.exists(): print(f"⚠ Folder not found: {folder_name}") continue sector = IndustrySector.objects.get(industry=industry, slug=sector_slug) print(f"\nšŸ“‚ Sector: {sector.name} (id={sector.id}, slug={sector.slug})") csv_files = sorted(folder_path.glob('*.csv')) if not csv_files: print(f" ⚠ No CSV files") continue print(f" Found {len(csv_files)} CSV files") with transaction.atomic(): for csv_file in csv_files: totals['files'] += 1 stats = import_csv(csv_file, industry, sector, dry_run=args.dry_run) totals['processed'] += stats['processed'] totals['imported'] += stats['imported'] totals['skipped_dup'] += stats['skipped_dup'] totals['skipped_inv'] += stats['skipped_inv'] totals['errors'] += stats['errors'] print(f" šŸ“„ {csv_file.name}") print(f" rows={stats['processed']} | āœ“ imported={stats['imported']} | ⊘ dup={stats['skipped_dup']} | inv={stats['skipped_inv']}") if args.dry_run: transaction.set_rollback(True) print(f"\n\n{'='*70}") print(f"SUMMARY") print(f"{'='*70}") print(f"Total CSV files: {totals['files']}") print(f"Total rows processed: {totals['processed']}") print(f"āœ“ Imported: {totals['imported']}") print(f"⊘ Skipped (duplicate): {totals['skipped_dup']}") print(f"⊘ Skipped (invalid): {totals['skipped_inv']}") print(f"āœ— Errors: {totals['errors']}") print(f"{'='*70}\n") if args.dry_run: print("ℹ DRY RUN — no data saved. Remove --dry-run to import.\n") else: print("āœ“ Import completed!\n") if __name__ == '__main__': main()