Files
igny8/backend/scripts/import_seed_keywords_single.py
IGNY8 VPS (Salman) 35d60247ad import new kw
2026-02-25 17:26:03 +00:00

327 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Import Seed Keywords from Single CSV File
This script imports keywords from a single CSV file into the IGNY8 global keywords database.
Use this for testing before running full import.
DUPLICATE HANDLING:
- Checks: keyword + country (case-insensitive)
- If duplicate exists in same industry+sector: SKIPS import
Usage:
docker compose -f docker-compose.app.yml exec igny8_backend \\
python3 /app/scripts/import_seed_keywords_single.py \\
--csv /app/../KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_physical-therapy_matching-terms_2025-12-19_04-25-15.csv \\
--industry "HealthCare Medical" \\
--sector "Physiotherapy Rehabilitation" \\
--dry-run
Author: IGNY8 Team
Date: January 13, 2026
"""
import os
import sys
import csv
import argparse
import django
from pathlib import Path
# Change to app directory for Django imports
sys.path.insert(0, '/app')
os.chdir('/app')
# Setup Django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')
django.setup()
from django.utils.text import slugify
from django.db import transaction
from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword
class KeywordImporter:
"""Import keywords from CSV into database"""
def __init__(self, dry_run=False, verbose=False):
self.dry_run = dry_run
self.verbose = verbose
self.stats = {
'processed': 0,
'imported': 0,
'skipped_duplicate': 0,
'skipped_invalid': 0,
'errors': 0
}
def log(self, message, force=False):
"""Print message if verbose or forced"""
if self.verbose or force:
print(message)
def get_or_create_industry(self, name):
"""Get or create Industry record"""
slug = slugify(name)
if self.dry_run:
self.log(f"[DRY RUN] Would get/create Industry: {name} (slug: {slug})")
# Return a mock object for dry run
class MockIndustry:
def __init__(self):
self.id = 0
self.name = name
self.slug = slug
return MockIndustry(), False
industry, created = Industry.objects.get_or_create(
slug=slug,
defaults={
'name': name,
'is_active': True,
'description': f'Auto-imported from KW_DB'
}
)
if created:
self.log(f"✓ Created Industry: {name}", force=True)
else:
self.log(f"✓ Found existing Industry: {name}")
return industry, created
def get_or_create_sector(self, industry, name):
"""Get or create IndustrySector record"""
slug = slugify(name)
if self.dry_run:
self.log(f"[DRY RUN] Would get/create Sector: {name} (slug: {slug})")
class MockSector:
def __init__(self):
self.id = 0
self.name = name
self.slug = slug
return MockSector(), False
sector, created = IndustrySector.objects.get_or_create(
industry=industry,
slug=slug,
defaults={
'name': name,
'is_active': True,
'description': f'Auto-imported from KW_DB'
}
)
if created:
self.log(f" ✓ Created Sector: {name}", force=True)
else:
self.log(f" ✓ Found existing Sector: {name}")
return sector, created
def is_duplicate(self, keyword, country, industry, sector):
"""
Check if keyword already exists with same country in this industry+sector.
Duplicate check: keyword + country (case-insensitive)
"""
if self.dry_run:
return False # Skip duplicate check in dry run
exists = SeedKeyword.objects.filter(
keyword__iexact=keyword,
country=country,
industry=industry,
sector=sector
).exists()
return exists
def import_keyword(self, keyword_data, industry, sector):
"""Import single keyword record"""
keyword = keyword_data['keyword']
country = keyword_data['country']
volume = keyword_data['volume']
difficulty = keyword_data['difficulty']
# Check for duplicate (keyword + country)
if self.is_duplicate(keyword, country, industry, sector):
self.log(f" ⊘ SKIP (duplicate): {keyword} [{country}]")
self.stats['skipped_duplicate'] += 1
return False
if self.dry_run:
self.log(f" [DRY RUN] Would import: {keyword} [{country}] (vol:{volume}, diff:{difficulty})")
return True
# Create keyword
try:
SeedKeyword.objects.create(
keyword=keyword,
industry=industry,
sector=sector,
volume=volume,
difficulty=difficulty,
country=country,
is_active=True
)
except Exception as e:
self.log(f" ⚠ Error creating: {keyword} [{country}]: {e}", force=True)
self.stats['errors'] += 1
return False
self.log(f" ✓ Imported: {keyword} [{country}] (vol:{volume}, diff:{difficulty})")
return True
def parse_csv_row(self, row):
"""Parse CSV row and extract keyword data"""
try:
keyword = row.get('Keyword', '').strip()
if not keyword:
return None
# Parse country (default to US)
country_raw = row.get('Country', 'us').strip().upper()
if not country_raw:
country_raw = 'US'
# Parse volume (default to 0)
volume_raw = row.get('Volume', '0').strip()
try:
volume = int(volume_raw) if volume_raw else 0
except (ValueError, TypeError):
volume = 0
# Parse difficulty (default to 0, clamp to 0-100)
difficulty_raw = row.get('Difficulty', '0').strip()
try:
difficulty = int(difficulty_raw) if difficulty_raw else 0
difficulty = max(0, min(100, difficulty)) # Clamp to 0-100
except (ValueError, TypeError):
difficulty = 0
return {
'keyword': keyword,
'country': country_raw,
'volume': volume,
'difficulty': difficulty
}
except Exception as e:
self.log(f" ⚠ Error parsing row: {e}")
return None
def import_csv(self, csv_path, industry_name, sector_name):
"""Import keywords from CSV file"""
csv_path = Path(csv_path)
if not csv_path.exists():
print(f"❌ ERROR: CSV file not found: {csv_path}")
return False
print(f"\n{'='*70}")
print(f"IMPORTING SEED KEYWORDS FROM CSV")
print(f"{'='*70}")
print(f"File: {csv_path.name}")
print(f"Industry: {industry_name}")
print(f"Sector: {sector_name}")
if self.dry_run:
print("Mode: DRY RUN (no database changes)")
print(f"{'='*70}\n")
# Get or create Industry and Sector
industry, _ = self.get_or_create_industry(industry_name)
sector, _ = self.get_or_create_sector(industry, sector_name)
# Read and import CSV
print(f"Processing keywords...\n")
try:
with transaction.atomic():
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
self.stats['processed'] += 1
keyword_data = self.parse_csv_row(row)
if not keyword_data:
self.stats['skipped_invalid'] += 1
continue
if self.import_keyword(keyword_data, industry, sector):
self.stats['imported'] += 1
# Rollback in dry run mode
if self.dry_run:
transaction.set_rollback(True)
except Exception as e:
print(f"\n❌ ERROR: {e}")
import traceback
traceback.print_exc()
self.stats['errors'] += 1
return False
# Print summary
print(f"\n{'='*70}")
print(f"IMPORT SUMMARY")
print(f"{'='*70}")
print(f"Total rows processed: {self.stats['processed']}")
print(f"✓ Imported: {self.stats['imported']}")
print(f"⊘ Skipped (duplicate): {self.stats['skipped_duplicate']}")
print(f"⊘ Skipped (invalid): {self.stats['skipped_invalid']}")
print(f"✗ Errors: {self.stats['errors']}")
print(f"{'='*70}\n")
if self.dry_run:
print(" This was a DRY RUN - no data was saved to database")
print("Remove --dry-run flag to perform actual import\n")
else:
print("✓ Import completed successfully!")
print(f"✓ Check Django admin: /admin/auth/seedkeyword/\n")
return True
def main():
parser = argparse.ArgumentParser(
description='Import seed keywords from single CSV file (with duplicate check: keyword+country)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Dry run (preview only)
docker compose -f docker-compose.app.yml exec igny8_backend \\
python3 /app/scripts/import_seed_keywords_single.py \\
--csv /app/../KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_muscle-stimulator_matching-terms_2025-12-19_04-25-32.csv \\
--industry "HealthCare Medical" \\
--sector "Physiotherapy Rehabilitation" \\
--dry-run --verbose
# Actual import
docker compose -f docker-compose.app.yml exec igny8_backend \\
python3 /app/scripts/import_seed_keywords_single.py \\
--csv /app/../KW_DB/HealthCare_Medical/Physiotherapy_Rehabilitation/google_us_muscle-stimulator_matching-terms_2025-12-19_04-25-32.csv \\
--industry "HealthCare Medical" \\
--sector "Physiotherapy Rehabilitation"
"""
)
parser.add_argument('--csv', required=True, help='Path to CSV file')
parser.add_argument('--industry', required=True, help='Industry name')
parser.add_argument('--sector', required=True, help='Sector name')
parser.add_argument('--dry-run', action='store_true', help='Preview without saving to database')
parser.add_argument('--verbose', action='store_true', help='Show detailed progress')
args = parser.parse_args()
# Create importer and run
importer = KeywordImporter(dry_run=args.dry_run, verbose=args.verbose)
success = importer.import_csv(args.csv, args.industry, args.sector)
sys.exit(0 if success else 1)
if __name__ == '__main__':
main()