#!/usr/bin/env python
"""
Import Seed Keywords from Single CSV File

This script imports keywords from a single CSV file into the IGNY8 global keywords database.
Use this for testing before running full import.

Usage:
    cd /data/app/igny8/backend
    python ../KW_DB/management/import_single_csv.py \\
        --csv /data/app/igny8/KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_physical-therapy_matching-terms_2025-12-19_04-25-15.csv \\
        --industry "HealthCare Medical" \\
        --sector "Physiotherapy Rehabilitation" \\
        --dry-run

Author: IGNY8 Team
Date: January 13, 2026
"""

import os
import sys
import csv
import argparse
from pathlib import Path

# Add Django project to path
# Use /app when running in Docker, /data/app/igny8/backend when running locally
backend_path = '/app' if os.path.exists('/app/manage.py') else '/data/app/igny8/backend'
sys.path.insert(0, backend_path)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')

import django
django.setup()

from django.utils.text import slugify
from django.db import transaction
from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword


class KeywordImporter:
    """Import keywords from CSV into database"""
    
    def __init__(self, dry_run=False, verbose=False):
        self.dry_run = dry_run
        self.verbose = verbose
        self.stats = {
            'processed': 0,
            'imported': 0,
            'skipped_duplicate': 0,
            'skipped_invalid': 0,
            'errors': 0
        }
    
    def log(self, message, force=False):
        """Print message if verbose or forced"""
        if self.verbose or force:
            print(message)
    
    def get_or_create_industry(self, name):
        """Get or create Industry record"""
        slug = slugify(name)
        
        if self.dry_run:
            self.log(f"[DRY RUN] Would get/create Industry: {name} (slug: {slug})")
            # Return a mock object for dry run
            class MockIndustry:
                id = 0
                name = name
                slug = slug
            return MockIndustry(), False
        
        industry, created = Industry.objects.get_or_create(
            slug=slug,
            defaults={
                'name': name,
                'is_active': True,
                'description': f'Auto-imported from KW_DB'
            }
        )
        
        if created:
            self.log(f"✓ Created Industry: {name}", force=True)
        else:
            self.log(f"✓ Found existing Industry: {name}")
        
        return industry, created
    
    def get_or_create_sector(self, industry, name):
        """Get or create IndustrySector record"""
        slug = slugify(name)
        
        if self.dry_run:
            self.log(f"[DRY RUN] Would get/create Sector: {name} (slug: {slug})")
            class MockSector:
                id = 0
                name = name
                slug = slug
            return MockSector(), False
        
        sector, created = IndustrySector.objects.get_or_create(
            industry=industry,
            slug=slug,
            defaults={
                'name': name,
                'is_active': True,
                'description': f'Auto-imported from KW_DB'
            }
        )
        
        if created:
            self.log(f"  ✓ Created Sector: {name}", force=True)
        else:
            self.log(f"  ✓ Found existing Sector: {name}")
        
        return sector, created
    
    def is_duplicate(self, keyword, country, industry, sector):
        """
        Check if keyword already exists with same country in this industry+sector.
        Duplicate check: keyword + country (case-insensitive)
        """
        if self.dry_run:
            return False  # Skip duplicate check in dry run
        
        exists = SeedKeyword.objects.filter(
            keyword__iexact=keyword,
            country=country,
            industry=industry,
            sector=sector
        ).exists()
        
        return exists
    
    def import_keyword(self, keyword_data, industry, sector):
        """Import single keyword record"""
        keyword = keyword_data['keyword']
        country = keyword_data['country']
        volume = keyword_data['volume']
        difficulty = keyword_data['difficulty']
        
        # Check for duplicate (keyword + country)
        if self.is_duplicate(keyword, country, industry, sector):
            self.log(f"  ⊘ SKIP (duplicate): {keyword} [{country}]")
            self.stats['skipped_duplicate'] += 1
            return False
        
        if self.dry_run:
            self.log(f"  [DRY RUN] Would import: {keyword} [{country}] (vol:{volume}, diff:{difficulty})")
            return True
        
        # Create keyword
        SeedKeyword.objects.create(
            keyword=keyword,
            industry=industry,
            sector=sector,
            volume=volume,
            difficulty=difficulty,
            country=country,
            is_active=True
        )
        
        self.log(f"  ✓ Imported: {keyword} [{country}] (vol:{volume}, diff:{difficulty})")
        return True
    
    def parse_csv_row(self, row):
        """Parse CSV row and extract keyword data"""
        try:
            keyword = row.get('Keyword', '').strip()
            if not keyword:
                return None
            
            # Parse country (default to US)
            country_raw = row.get('Country', 'us').strip().upper()
            if not country_raw:
                country_raw = 'US'
            
            # Parse volume (default to 0)
            volume_raw = row.get('Volume', '0').strip()
            try:
                volume = int(volume_raw) if volume_raw else 0
            except (ValueError, TypeError):
                volume = 0
            
            # Parse difficulty (default to 0, clamp to 0-100)
            difficulty_raw = row.get('Difficulty', '0').strip()
            try:
                difficulty = int(difficulty_raw) if difficulty_raw else 0
                difficulty = max(0, min(100, difficulty))  # Clamp to 0-100
            except (ValueError, TypeError):
                difficulty = 0
            
            return {
                'keyword': keyword,
                'country': country_raw,
                'volume': volume,
                'difficulty': difficulty
            }
        
        except Exception as e:
            self.log(f"  ⚠ Error parsing row: {e}")
            return None
    
    def import_csv(self, csv_path, industry_name, sector_name):
        """Import keywords from CSV file"""
        csv_path = Path(csv_path)
        
        if not csv_path.exists():
            print(f"❌ ERROR: CSV file not found: {csv_path}")
            return False
        
        print(f"\n{'='*60}")
        print(f"IMPORTING FROM: {csv_path.name}")
        print(f"Industry: {industry_name}")
        print(f"Sector: {sector_name}")
        if self.dry_run:
            print("MODE: DRY RUN (no database changes)")
        print(f"{'='*60}\n")
        
        # Get or create Industry and Sector
        industry, _ = self.get_or_create_industry(industry_name)
        sector, _ = self.get_or_create_sector(industry, sector_name)
        
        # Read and import CSV
        print(f"\nProcessing keywords...")
        
        try:
            with transaction.atomic():
                with open(csv_path, 'r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    
                    for row in reader:
                        self.stats['processed'] += 1
                        
                        keyword_data = self.parse_csv_row(row)
                        if not keyword_data:
                            self.stats['skipped_invalid'] += 1
                            continue
                        
                        if self.import_keyword(keyword_data, industry, sector):
                            self.stats['imported'] += 1
                
                # Rollback in dry run mode
                if self.dry_run:
                    transaction.set_rollback(True)
        
        except Exception as e:
            print(f"\n❌ ERROR: {e}")
            self.stats['errors'] += 1
            return False
        
        # Print summary
        print(f"\n{'='*60}")
        print(f"IMPORT SUMMARY")
        print(f"{'='*60}")
        print(f"Total rows processed: {self.stats['processed']}")
        print(f"✓ Imported:          {self.stats['imported']}")
        print(f"⊘ Skipped (dup):     {self.stats['skipped_duplicate']}")
        print(f"⊘ Skipped (invalid): {self.stats['skipped_invalid']}")
        print(f"✗ Errors:            {self.stats['errors']}")
        print(f"{'='*60}\n")
        
        if self.dry_run:
            print("ℹ This was a DRY RUN - no data was saved to database")
            print("Remove --dry-run flag to perform actual import\n")
        else:
            print("✓ Import completed successfully!\n")
        
        return True


def main():
    parser = argparse.ArgumentParser(
        description='Import seed keywords from single CSV file',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Dry run (preview only)
  python import_single_csv.py --csv /path/to/file.csv --industry "HealthCare Medical" --sector "Physiotherapy Rehabilitation" --dry-run
  
  # Actual import with verbose output
  python import_single_csv.py --csv /path/to/file.csv --industry "HealthCare Medical" --sector "Physiotherapy Rehabilitation" --verbose
        """
    )
    
    parser.add_argument('--csv', required=True, help='Path to CSV file')
    parser.add_argument('--industry', required=True, help='Industry name')
    parser.add_argument('--sector', required=True, help='Sector name')
    parser.add_argument('--dry-run', action='store_true', help='Preview without saving to database')
    parser.add_argument('--verbose', action='store_true', help='Show detailed progress')
    
    args = parser.parse_args()
    
    # Create importer and run
    importer = KeywordImporter(dry_run=args.dry_run, verbose=args.verbose)
    success = importer.import_csv(args.csv, args.industry, args.sector)
    
    sys.exit(0 if success else 1)


if __name__ == '__main__':
    main()