#!/usr/bin/env python3 """ IGNY8 AI Data Mapping Script Extracts complete reference table for all AI-related elements (functions, models, prompts, limits, calls) to eliminate assumptions during restructuring. Output: Markdown table with all AI Elements for cluster, idea, content, image """ import os import re import json import ast from pathlib import Path from typing import Dict, List, Any, Optional # Project root (assuming script is in scripts/ directory) PROJECT_ROOT = Path(__file__).parent.parent BACKEND_ROOT = PROJECT_ROOT / "backend" / "igny8_core" def extract_function_info(file_path: Path, function_name: str) -> Dict[str, Any]: """Extract information about a function from a Python file""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() except Exception as e: return {"error": f"Could not read file: {e}"} # Try to parse AST try: tree = ast.parse(content) except SyntaxError: return {"error": "Syntax error in file"} info = { "file": str(file_path.relative_to(PROJECT_ROOT)), "function_name": function_name, "found": False, "line_number": None, "uses_ai_processor": False, "uses_celery": False, "has_progress_callback": False, "has_request_steps": False, "has_response_steps": False, "prompt_source": "Unknown", "model_source": "Unknown", "validation_checks": [], "limit_checks": [], } # Search for function definition for node in ast.walk(tree): if isinstance(node, ast.FunctionDef) and node.name == function_name: info["found"] = True info["line_number"] = node.lineno # Check function body for patterns func_content = ast.get_source_segment(content, node) or "" # Check for AIProcessor usage if "AIProcessor" in func_content or "ai_processor" in func_content: info["uses_ai_processor"] = True # Check for Celery if "self.request" in func_content or "@shared_task" in content[:node.lineno * 100]: info["uses_celery"] = True # Check for progress tracking if "progress_callback" in func_content or "progress_tracker" in func_content: info["has_progress_callback"] = True # Check for step tracking if "request_steps" in func_content: info["has_request_steps"] = True if "response_steps" in func_content: info["has_response_steps"] = True # Check for prompt sources if "get_prompt_value" in func_content: info["prompt_source"] = "Database (get_prompt_value)" elif "get_default_prompt" in func_content: info["prompt_source"] = "Default (get_default_prompt)" elif "prompt_template" in func_content.lower(): info["prompt_source"] = "Inline/Hardcoded" # Check for model selection if "default_model" in func_content or "self.default_model" in func_content: info["model_source"] = "AIProcessor.default_model" elif "get_model" in func_content: info["model_source"] = "Function.get_model()" elif "IntegrationSettings" in func_content: info["model_source"] = "IntegrationSettings.config['model']" # Check for validation if "validate" in func_content.lower(): info["validation_checks"].append("Has validate() call") if "check_credits" in func_content: info["limit_checks"].append("Credit check") if "daily_cluster_limit" in func_content or "max_clusters" in func_content: info["limit_checks"].append("Plan limits") break return info def extract_class_info(file_path: Path, class_name: str) -> Dict[str, Any]: """Extract information about a class from a Python file""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() except Exception as e: return {"error": f"Could not read file: {e}"} try: tree = ast.parse(content) except SyntaxError: return {"error": "Syntax error in file"} info = { "file": str(file_path.relative_to(PROJECT_ROOT)), "class_name": class_name, "found": False, "line_number": None, "methods": [], "inherits_from": [], } for node in ast.walk(tree): if isinstance(node, ast.ClassDef) and node.name == class_name: info["found"] = True info["line_number"] = node.lineno # Get base classes for base in node.bases: if isinstance(base, ast.Name): info["inherits_from"].append(base.id) # Get methods for item in node.body: if isinstance(item, ast.FunctionDef): info["methods"].append(item.name) break return info def find_ai_functions() -> List[Dict[str, Any]]: """Find all AI-related functions in the codebase""" functions = [] # Define AI functions to search for ai_function_definitions = [ { "name": "_auto_cluster_keywords_core", "file": BACKEND_ROOT / "modules" / "planner" / "tasks.py", "type": "core_function", "category": "cluster" }, { "name": "_generate_single_idea_core", "file": BACKEND_ROOT / "modules" / "planner" / "tasks.py", "type": "core_function", "category": "ideas" }, { "name": "auto_generate_content_task", "file": BACKEND_ROOT / "modules" / "writer" / "tasks.py", "type": "celery_task", "category": "content" }, { "name": "AutoClusterFunction", "file": BACKEND_ROOT / "ai" / "functions" / "auto_cluster.py", "type": "class", "category": "cluster" }, { "name": "cluster_keywords", "file": BACKEND_ROOT / "utils" / "ai_processor.py", "type": "method", "category": "cluster" }, { "name": "generate_ideas", "file": BACKEND_ROOT / "utils" / "ai_processor.py", "type": "method", "category": "ideas" }, { "name": "generate_content", "file": BACKEND_ROOT / "utils" / "ai_processor.py", "type": "method", "category": "content" }, { "name": "generate_image", "file": BACKEND_ROOT / "utils" / "ai_processor.py", "type": "method", "category": "image" }, { "name": "run_ai_task", "file": BACKEND_ROOT / "ai" / "tasks.py", "type": "celery_task", "category": "unified" }, { "name": "execute", "file": BACKEND_ROOT / "ai" / "engine.py", "type": "method", "category": "unified" }, ] for func_def in ai_function_definitions: file_path = func_def["file"] if not file_path.exists(): continue if func_def["type"] == "class": info = extract_class_info(file_path, func_def["name"]) else: info = extract_function_info(file_path, func_def["name"]) info.update({ "type": func_def["type"], "category": func_def["category"] }) functions.append(info) return functions def extract_prompt_info() -> List[Dict[str, Any]]: """Extract prompt information""" prompts = [] utils_file = BACKEND_ROOT / "modules" / "system" / "utils.py" if utils_file.exists(): with open(utils_file, 'r', encoding='utf-8') as f: content = f.read() # Find prompt types in get_default_prompt prompt_types = re.findall(r"'(\w+)':\s*\"\"\"", content) for prompt_type in prompt_types: prompts.append({ "prompt_type": prompt_type, "source": "Hardcoded in get_default_prompt()", "file": "modules/system/utils.py", "retrieval": "get_prompt_value() -> AIPrompt model or default" }) return prompts def extract_model_info() -> List[Dict[str, Any]]: """Extract model configuration information""" models = [] processor_file = BACKEND_ROOT / "utils" / "ai_processor.py" if processor_file.exists(): with open(processor_file, 'r', encoding='utf-8') as f: content = f.read() # Find MODEL_RATES model_rates_match = re.search(r'MODEL_RATES\s*=\s*\{([^}]+)\}', content, re.DOTALL) if model_rates_match: models_text = model_rates_match.group(1) model_names = re.findall(r"'([^']+)'", models_text) for model in model_names: models.append({ "model_name": model, "source": "MODEL_RATES constant", "file": "utils/ai_processor.py", "selection": "AIProcessor._get_model() -> IntegrationSettings or Django settings" }) return models def generate_markdown_table(functions: List[Dict], prompts: List[Dict], models: List[Dict]) -> str: """Generate markdown table from extracted data""" output = [] output.append("# IGNY8 AI Elements Reference Table\n") output.append("Generated by extract_ai_elements.py\n") output.append("---\n\n") # Functions table output.append("## ๐Ÿง  AI Core Functions\n\n") output.append("| Function Name | Category | Type | File | Line | Uses AIProcessor | Celery | Progress | Steps | Prompt Source | Model Source |\n") output.append("|---------------|----------|------|------|------|------------------|--------|----------|-------|---------------|--------------|\n") for func in functions: if func.get("error"): continue name = func.get("function_name") or func.get("class_name", "N/A") category = func.get("category", "N/A") func_type = func.get("type", "N/A") file = func.get("file", "N/A") line = str(func.get("line_number", "N/A")) uses_ai = "โœ…" if func.get("uses_ai_processor") else "โŒ" celery = "โœ…" if func.get("uses_celery") else "โŒ" progress = "โœ…" if func.get("has_progress_callback") else "โŒ" steps = "โœ…" if (func.get("has_request_steps") or func.get("has_response_steps")) else "โŒ" prompt = func.get("prompt_source", "Unknown") model = func.get("model_source", "Unknown") output.append(f"| {name} | {category} | {func_type} | `{file}` | {line} | {uses_ai} | {celery} | {progress} | {steps} | {prompt} | {model} |\n") # Prompts table output.append("\n## ๐Ÿงฑ Prompt Sources\n\n") output.append("| Prompt Type | Source | File | Retrieval Method |\n") output.append("|-------------|--------|------|------------------|\n") for prompt in prompts: output.append(f"| {prompt['prompt_type']} | {prompt['source']} | `{prompt['file']}` | {prompt['retrieval']} |\n") # Models table output.append("\n## ๐Ÿงพ Model Configuration\n\n") output.append("| Model Name | Source | File | Selection Method |\n") output.append("|------------|--------|------|------------------|\n") for model in models: output.append(f"| {model['model_name']} | {model['source']} | `{model['file']}` | {model['selection']} |\n") # Validation and Limits output.append("\n## โš ๏ธ Validation & Limits\n\n") output.append("| Function | Validation Checks | Limit Checks |\n") output.append("|----------|-------------------|--------------|\n") for func in functions: if func.get("error") or not func.get("found"): continue name = func.get("function_name") or func.get("class_name", "N/A") validations = ", ".join(func.get("validation_checks", [])) or "None" limits = ", ".join(func.get("limit_checks", [])) or "None" output.append(f"| {name} | {validations} | {limits} |\n") return "".join(output) def main(): """Main execution""" print("๐Ÿ” Extracting AI elements from codebase...") functions = find_ai_functions() prompts = extract_prompt_info() models = extract_model_info() print(f"โœ… Found {len(functions)} functions") print(f"โœ… Found {len(prompts)} prompt types") print(f"โœ… Found {len(models)} models") # Generate markdown markdown = generate_markdown_table(functions, prompts, models) # Save to file output_file = PROJECT_ROOT / "docs" / "ActiveDocs" / "AI-ELEMENTS-EXTRACTED.md" output_file.parent.mkdir(parents=True, exist_ok=True) with open(output_file, 'w', encoding='utf-8') as f: f.write(markdown) print(f"\nโœ… Table saved to: {output_file.relative_to(PROJECT_ROOT)}") # Also print to console print("\n" + "="*80) print(markdown) print("="*80) if __name__ == "__main__": main()