new kw for it services & sectors alignment & viewer access partial fixed
This commit is contained in:
188
backend/scripts/import_us_ca_tech.py
Normal file
188
backend/scripts/import_us_ca_tech.py
Normal file
@@ -0,0 +1,188 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Import US/CA Technology & IT Services Keywords
|
||||
|
||||
Targeted import script for the "US CA" folder in Technology_&_IT_Services.
|
||||
Maps folder names to existing DB sectors and imports all CSV files.
|
||||
|
||||
Usage:
|
||||
docker exec igny8_backend python3 /app/scripts/import_us_ca_tech.py --dry-run
|
||||
docker exec igny8_backend python3 /app/scripts/import_us_ca_tech.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
import argparse
|
||||
|
||||
sys.path.insert(0, '/app')
|
||||
os.chdir('/app')
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')
|
||||
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
from pathlib import Path
|
||||
from django.db import transaction
|
||||
from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword
|
||||
|
||||
|
||||
# Hard-coded mapping: folder name in "US CA" -> sector slug in DB
|
||||
FOLDER_TO_SECTOR_SLUG = {
|
||||
'Automation & Workflow Systems': 'automation-workflow-systems',
|
||||
'Cloud Services': 'cloud-services',
|
||||
'DATA & AI Services': 'data-ai-services',
|
||||
'Digital Marketing & SEO': 'digital-marketing-seo',
|
||||
'SAAS': 'saas',
|
||||
'Web Development & Design': 'web-development-design',
|
||||
}
|
||||
|
||||
INDUSTRY_ID = 27 # Technology & IT Services
|
||||
|
||||
BASE_PATH = Path('/data/app/igny8/KW_DB/Technology_&_IT_Services/US CA')
|
||||
|
||||
|
||||
def parse_csv_row(row):
|
||||
"""Parse a CSV row into keyword data."""
|
||||
keyword = row.get('Keyword', '').strip()
|
||||
if not keyword:
|
||||
return None
|
||||
|
||||
country_raw = row.get('Country', 'US').strip().upper()
|
||||
if not country_raw:
|
||||
country_raw = 'US'
|
||||
|
||||
volume_raw = row.get('Volume', '0').strip()
|
||||
try:
|
||||
volume = int(volume_raw) if volume_raw else 0
|
||||
except (ValueError, TypeError):
|
||||
volume = 0
|
||||
|
||||
difficulty_raw = row.get('Difficulty', '0').strip()
|
||||
try:
|
||||
difficulty = int(difficulty_raw) if difficulty_raw else 0
|
||||
difficulty = max(0, min(100, difficulty))
|
||||
except (ValueError, TypeError):
|
||||
difficulty = 0
|
||||
|
||||
return {
|
||||
'keyword': keyword,
|
||||
'country': country_raw,
|
||||
'volume': volume,
|
||||
'difficulty': difficulty,
|
||||
}
|
||||
|
||||
|
||||
def import_csv(csv_path, industry, sector, dry_run=False):
|
||||
"""Import a single CSV file, returns stats dict."""
|
||||
stats = {'processed': 0, 'imported': 0, 'skipped_dup': 0, 'skipped_inv': 0, 'errors': 0}
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
stats['processed'] += 1
|
||||
data = parse_csv_row(row)
|
||||
if not data:
|
||||
stats['skipped_inv'] += 1
|
||||
continue
|
||||
|
||||
# Duplicate check: keyword (case-insensitive) in same industry+sector
|
||||
# DB unique constraint is (keyword, industry_id, sector_id) - no country
|
||||
exists = SeedKeyword.objects.filter(
|
||||
keyword__iexact=data['keyword'],
|
||||
industry=industry,
|
||||
sector=sector,
|
||||
).exists()
|
||||
|
||||
if exists:
|
||||
stats['skipped_dup'] += 1
|
||||
continue
|
||||
|
||||
if not dry_run:
|
||||
SeedKeyword.objects.create(
|
||||
keyword=data['keyword'],
|
||||
industry=industry,
|
||||
sector=sector,
|
||||
volume=data['volume'],
|
||||
difficulty=data['difficulty'],
|
||||
country=data['country'],
|
||||
is_active=True,
|
||||
)
|
||||
stats['imported'] += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
stats['errors'] += 1
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Import US/CA Tech keywords')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Preview without saving')
|
||||
args = parser.parse_args()
|
||||
|
||||
industry = Industry.objects.get(id=INDUSTRY_ID)
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"IMPORT: Technology & IT Services — US CA Keywords")
|
||||
print(f"{'='*70}")
|
||||
print(f"Industry: {industry.name} (id={industry.id})")
|
||||
print(f"Mode: {'DRY RUN' if args.dry_run else 'LIVE IMPORT'}")
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
totals = {'files': 0, 'processed': 0, 'imported': 0, 'skipped_dup': 0, 'skipped_inv': 0, 'errors': 0}
|
||||
|
||||
for folder_name, sector_slug in sorted(FOLDER_TO_SECTOR_SLUG.items()):
|
||||
folder_path = BASE_PATH / folder_name
|
||||
if not folder_path.exists():
|
||||
print(f"⚠ Folder not found: {folder_name}")
|
||||
continue
|
||||
|
||||
sector = IndustrySector.objects.get(industry=industry, slug=sector_slug)
|
||||
print(f"\n📂 Sector: {sector.name} (id={sector.id}, slug={sector.slug})")
|
||||
|
||||
csv_files = sorted(folder_path.glob('*.csv'))
|
||||
if not csv_files:
|
||||
print(f" ⚠ No CSV files")
|
||||
continue
|
||||
|
||||
print(f" Found {len(csv_files)} CSV files")
|
||||
|
||||
with transaction.atomic():
|
||||
for csv_file in csv_files:
|
||||
totals['files'] += 1
|
||||
stats = import_csv(csv_file, industry, sector, dry_run=args.dry_run)
|
||||
|
||||
totals['processed'] += stats['processed']
|
||||
totals['imported'] += stats['imported']
|
||||
totals['skipped_dup'] += stats['skipped_dup']
|
||||
totals['skipped_inv'] += stats['skipped_inv']
|
||||
totals['errors'] += stats['errors']
|
||||
|
||||
print(f" 📄 {csv_file.name}")
|
||||
print(f" rows={stats['processed']} | ✓ imported={stats['imported']} | ⊘ dup={stats['skipped_dup']} | inv={stats['skipped_inv']}")
|
||||
|
||||
if args.dry_run:
|
||||
transaction.set_rollback(True)
|
||||
|
||||
print(f"\n\n{'='*70}")
|
||||
print(f"SUMMARY")
|
||||
print(f"{'='*70}")
|
||||
print(f"Total CSV files: {totals['files']}")
|
||||
print(f"Total rows processed: {totals['processed']}")
|
||||
print(f"✓ Imported: {totals['imported']}")
|
||||
print(f"⊘ Skipped (duplicate): {totals['skipped_dup']}")
|
||||
print(f"⊘ Skipped (invalid): {totals['skipped_inv']}")
|
||||
print(f"✗ Errors: {totals['errors']}")
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
if args.dry_run:
|
||||
print("ℹ DRY RUN — no data saved. Remove --dry-run to import.\n")
|
||||
else:
|
||||
print("✓ Import completed!\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user