new kw for it services & sectors alignment & viewer access partial fixed

This commit is contained in:
IGNY8 VPS (Salman)
2026-02-20 23:29:51 +00:00
parent 2011e48145
commit 341f7c5bc7
110 changed files with 4768 additions and 36 deletions

View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python3
"""
Import US/CA Technology & IT Services Keywords
Targeted import script for the "US CA" folder in Technology_&_IT_Services.
Maps folder names to existing DB sectors and imports all CSV files.
Usage:
docker exec igny8_backend python3 /app/scripts/import_us_ca_tech.py --dry-run
docker exec igny8_backend python3 /app/scripts/import_us_ca_tech.py
"""
import os
import sys
import csv
import argparse
sys.path.insert(0, '/app')
os.chdir('/app')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'igny8_core.settings')
import django
django.setup()
from pathlib import Path
from django.db import transaction
from igny8_core.auth.models import Industry, IndustrySector, SeedKeyword
# Hard-coded mapping: folder name in "US CA" -> sector slug in DB
FOLDER_TO_SECTOR_SLUG = {
'Automation & Workflow Systems': 'automation-workflow-systems',
'Cloud Services': 'cloud-services',
'DATA & AI Services': 'data-ai-services',
'Digital Marketing & SEO': 'digital-marketing-seo',
'SAAS': 'saas',
'Web Development & Design': 'web-development-design',
}
INDUSTRY_ID = 27 # Technology & IT Services
BASE_PATH = Path('/data/app/igny8/KW_DB/Technology_&_IT_Services/US CA')
def parse_csv_row(row):
"""Parse a CSV row into keyword data."""
keyword = row.get('Keyword', '').strip()
if not keyword:
return None
country_raw = row.get('Country', 'US').strip().upper()
if not country_raw:
country_raw = 'US'
volume_raw = row.get('Volume', '0').strip()
try:
volume = int(volume_raw) if volume_raw else 0
except (ValueError, TypeError):
volume = 0
difficulty_raw = row.get('Difficulty', '0').strip()
try:
difficulty = int(difficulty_raw) if difficulty_raw else 0
difficulty = max(0, min(100, difficulty))
except (ValueError, TypeError):
difficulty = 0
return {
'keyword': keyword,
'country': country_raw,
'volume': volume,
'difficulty': difficulty,
}
def import_csv(csv_path, industry, sector, dry_run=False):
"""Import a single CSV file, returns stats dict."""
stats = {'processed': 0, 'imported': 0, 'skipped_dup': 0, 'skipped_inv': 0, 'errors': 0}
try:
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
stats['processed'] += 1
data = parse_csv_row(row)
if not data:
stats['skipped_inv'] += 1
continue
# Duplicate check: keyword (case-insensitive) in same industry+sector
# DB unique constraint is (keyword, industry_id, sector_id) - no country
exists = SeedKeyword.objects.filter(
keyword__iexact=data['keyword'],
industry=industry,
sector=sector,
).exists()
if exists:
stats['skipped_dup'] += 1
continue
if not dry_run:
SeedKeyword.objects.create(
keyword=data['keyword'],
industry=industry,
sector=sector,
volume=data['volume'],
difficulty=data['difficulty'],
country=data['country'],
is_active=True,
)
stats['imported'] += 1
except Exception as e:
print(f" ❌ Error: {e}")
stats['errors'] += 1
return stats
def main():
parser = argparse.ArgumentParser(description='Import US/CA Tech keywords')
parser.add_argument('--dry-run', action='store_true', help='Preview without saving')
args = parser.parse_args()
industry = Industry.objects.get(id=INDUSTRY_ID)
print(f"\n{'='*70}")
print(f"IMPORT: Technology & IT Services — US CA Keywords")
print(f"{'='*70}")
print(f"Industry: {industry.name} (id={industry.id})")
print(f"Mode: {'DRY RUN' if args.dry_run else 'LIVE IMPORT'}")
print(f"{'='*70}\n")
totals = {'files': 0, 'processed': 0, 'imported': 0, 'skipped_dup': 0, 'skipped_inv': 0, 'errors': 0}
for folder_name, sector_slug in sorted(FOLDER_TO_SECTOR_SLUG.items()):
folder_path = BASE_PATH / folder_name
if not folder_path.exists():
print(f"⚠ Folder not found: {folder_name}")
continue
sector = IndustrySector.objects.get(industry=industry, slug=sector_slug)
print(f"\n📂 Sector: {sector.name} (id={sector.id}, slug={sector.slug})")
csv_files = sorted(folder_path.glob('*.csv'))
if not csv_files:
print(f" ⚠ No CSV files")
continue
print(f" Found {len(csv_files)} CSV files")
with transaction.atomic():
for csv_file in csv_files:
totals['files'] += 1
stats = import_csv(csv_file, industry, sector, dry_run=args.dry_run)
totals['processed'] += stats['processed']
totals['imported'] += stats['imported']
totals['skipped_dup'] += stats['skipped_dup']
totals['skipped_inv'] += stats['skipped_inv']
totals['errors'] += stats['errors']
print(f" 📄 {csv_file.name}")
print(f" rows={stats['processed']} | ✓ imported={stats['imported']} | ⊘ dup={stats['skipped_dup']} | inv={stats['skipped_inv']}")
if args.dry_run:
transaction.set_rollback(True)
print(f"\n\n{'='*70}")
print(f"SUMMARY")
print(f"{'='*70}")
print(f"Total CSV files: {totals['files']}")
print(f"Total rows processed: {totals['processed']}")
print(f"✓ Imported: {totals['imported']}")
print(f"⊘ Skipped (duplicate): {totals['skipped_dup']}")
print(f"⊘ Skipped (invalid): {totals['skipped_inv']}")
print(f"✗ Errors: {totals['errors']}")
print(f"{'='*70}\n")
if args.dry_run:
print(" DRY RUN — no data saved. Remove --dry-run to import.\n")
else:
print("✓ Import completed!\n")
if __name__ == '__main__':
main()