Files
igny8/scripts/extract_ai_elements.py
2025-11-09 19:07:06 +05:00

381 lines
13 KiB
Python

#!/usr/bin/env python3
"""
IGNY8 AI Data Mapping Script
Extracts complete reference table for all AI-related elements (functions, models, prompts, limits, calls)
to eliminate assumptions during restructuring.
Output: Markdown table with all AI Elements for cluster, idea, content, image
"""
import os
import re
import json
import ast
from pathlib import Path
from typing import Dict, List, Any, Optional
# Project root (assuming script is in scripts/ directory)
PROJECT_ROOT = Path(__file__).parent.parent
BACKEND_ROOT = PROJECT_ROOT / "backend" / "igny8_core"
def extract_function_info(file_path: Path, function_name: str) -> Dict[str, Any]:
"""Extract information about a function from a Python file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
return {"error": f"Could not read file: {e}"}
# Try to parse AST
try:
tree = ast.parse(content)
except SyntaxError:
return {"error": "Syntax error in file"}
info = {
"file": str(file_path.relative_to(PROJECT_ROOT)),
"function_name": function_name,
"found": False,
"line_number": None,
"uses_ai_processor": False,
"uses_celery": False,
"has_progress_callback": False,
"has_request_steps": False,
"has_response_steps": False,
"prompt_source": "Unknown",
"model_source": "Unknown",
"validation_checks": [],
"limit_checks": [],
}
# Search for function definition
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == function_name:
info["found"] = True
info["line_number"] = node.lineno
# Check function body for patterns
func_content = ast.get_source_segment(content, node) or ""
# Check for AIProcessor usage
if "AIProcessor" in func_content or "ai_processor" in func_content:
info["uses_ai_processor"] = True
# Check for Celery
if "self.request" in func_content or "@shared_task" in content[:node.lineno * 100]:
info["uses_celery"] = True
# Check for progress tracking
if "progress_callback" in func_content or "progress_tracker" in func_content:
info["has_progress_callback"] = True
# Check for step tracking
if "request_steps" in func_content:
info["has_request_steps"] = True
if "response_steps" in func_content:
info["has_response_steps"] = True
# Check for prompt sources
if "get_prompt_value" in func_content:
info["prompt_source"] = "Database (get_prompt_value)"
elif "get_default_prompt" in func_content:
info["prompt_source"] = "Default (get_default_prompt)"
elif "prompt_template" in func_content.lower():
info["prompt_source"] = "Inline/Hardcoded"
# Check for model selection
if "default_model" in func_content or "self.default_model" in func_content:
info["model_source"] = "AIProcessor.default_model"
elif "get_model" in func_content:
info["model_source"] = "Function.get_model()"
elif "IntegrationSettings" in func_content:
info["model_source"] = "IntegrationSettings.config['model']"
# Check for validation
if "validate" in func_content.lower():
info["validation_checks"].append("Has validate() call")
if "check_credits" in func_content:
info["limit_checks"].append("Credit check")
if "daily_cluster_limit" in func_content or "max_clusters" in func_content:
info["limit_checks"].append("Plan limits")
break
return info
def extract_class_info(file_path: Path, class_name: str) -> Dict[str, Any]:
"""Extract information about a class from a Python file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
return {"error": f"Could not read file: {e}"}
try:
tree = ast.parse(content)
except SyntaxError:
return {"error": "Syntax error in file"}
info = {
"file": str(file_path.relative_to(PROJECT_ROOT)),
"class_name": class_name,
"found": False,
"line_number": None,
"methods": [],
"inherits_from": [],
}
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef) and node.name == class_name:
info["found"] = True
info["line_number"] = node.lineno
# Get base classes
for base in node.bases:
if isinstance(base, ast.Name):
info["inherits_from"].append(base.id)
# Get methods
for item in node.body:
if isinstance(item, ast.FunctionDef):
info["methods"].append(item.name)
break
return info
def find_ai_functions() -> List[Dict[str, Any]]:
"""Find all AI-related functions in the codebase"""
functions = []
# Define AI functions to search for
ai_function_definitions = [
{
"name": "_auto_cluster_keywords_core",
"file": BACKEND_ROOT / "modules" / "planner" / "tasks.py",
"type": "core_function",
"category": "cluster"
},
{
"name": "_generate_single_idea_core",
"file": BACKEND_ROOT / "modules" / "planner" / "tasks.py",
"type": "core_function",
"category": "ideas"
},
{
"name": "auto_generate_content_task",
"file": BACKEND_ROOT / "modules" / "writer" / "tasks.py",
"type": "celery_task",
"category": "content"
},
{
"name": "AutoClusterFunction",
"file": BACKEND_ROOT / "ai" / "functions" / "auto_cluster.py",
"type": "class",
"category": "cluster"
},
{
"name": "cluster_keywords",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "cluster"
},
{
"name": "generate_ideas",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "ideas"
},
{
"name": "generate_content",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "content"
},
{
"name": "generate_image",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "image"
},
{
"name": "run_ai_task",
"file": BACKEND_ROOT / "ai" / "tasks.py",
"type": "celery_task",
"category": "unified"
},
{
"name": "execute",
"file": BACKEND_ROOT / "ai" / "engine.py",
"type": "method",
"category": "unified"
},
]
for func_def in ai_function_definitions:
file_path = func_def["file"]
if not file_path.exists():
continue
if func_def["type"] == "class":
info = extract_class_info(file_path, func_def["name"])
else:
info = extract_function_info(file_path, func_def["name"])
info.update({
"type": func_def["type"],
"category": func_def["category"]
})
functions.append(info)
return functions
def extract_prompt_info() -> List[Dict[str, Any]]:
"""Extract prompt information"""
prompts = []
utils_file = BACKEND_ROOT / "modules" / "system" / "utils.py"
if utils_file.exists():
with open(utils_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find prompt types in get_default_prompt
prompt_types = re.findall(r"'(\w+)':\s*\"\"\"", content)
for prompt_type in prompt_types:
prompts.append({
"prompt_type": prompt_type,
"source": "Hardcoded in get_default_prompt()",
"file": "modules/system/utils.py",
"retrieval": "get_prompt_value() -> AIPrompt model or default"
})
return prompts
def extract_model_info() -> List[Dict[str, Any]]:
"""Extract model configuration information"""
models = []
processor_file = BACKEND_ROOT / "utils" / "ai_processor.py"
if processor_file.exists():
with open(processor_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find MODEL_RATES
model_rates_match = re.search(r'MODEL_RATES\s*=\s*\{([^}]+)\}', content, re.DOTALL)
if model_rates_match:
models_text = model_rates_match.group(1)
model_names = re.findall(r"'([^']+)'", models_text)
for model in model_names:
models.append({
"model_name": model,
"source": "MODEL_RATES constant",
"file": "utils/ai_processor.py",
"selection": "AIProcessor._get_model() -> IntegrationSettings or Django settings"
})
return models
def generate_markdown_table(functions: List[Dict], prompts: List[Dict], models: List[Dict]) -> str:
"""Generate markdown table from extracted data"""
output = []
output.append("# IGNY8 AI Elements Reference Table\n")
output.append("Generated by extract_ai_elements.py\n")
output.append("---\n\n")
# Functions table
output.append("## 🧠 AI Core Functions\n\n")
output.append("| Function Name | Category | Type | File | Line | Uses AIProcessor | Celery | Progress | Steps | Prompt Source | Model Source |\n")
output.append("|---------------|----------|------|------|------|------------------|--------|----------|-------|---------------|--------------|\n")
for func in functions:
if func.get("error"):
continue
name = func.get("function_name") or func.get("class_name", "N/A")
category = func.get("category", "N/A")
func_type = func.get("type", "N/A")
file = func.get("file", "N/A")
line = str(func.get("line_number", "N/A"))
uses_ai = "" if func.get("uses_ai_processor") else ""
celery = "" if func.get("uses_celery") else ""
progress = "" if func.get("has_progress_callback") else ""
steps = "" if (func.get("has_request_steps") or func.get("has_response_steps")) else ""
prompt = func.get("prompt_source", "Unknown")
model = func.get("model_source", "Unknown")
output.append(f"| {name} | {category} | {func_type} | `{file}` | {line} | {uses_ai} | {celery} | {progress} | {steps} | {prompt} | {model} |\n")
# Prompts table
output.append("\n## 🧱 Prompt Sources\n\n")
output.append("| Prompt Type | Source | File | Retrieval Method |\n")
output.append("|-------------|--------|------|------------------|\n")
for prompt in prompts:
output.append(f"| {prompt['prompt_type']} | {prompt['source']} | `{prompt['file']}` | {prompt['retrieval']} |\n")
# Models table
output.append("\n## 🧾 Model Configuration\n\n")
output.append("| Model Name | Source | File | Selection Method |\n")
output.append("|------------|--------|------|------------------|\n")
for model in models:
output.append(f"| {model['model_name']} | {model['source']} | `{model['file']}` | {model['selection']} |\n")
# Validation and Limits
output.append("\n## ⚠️ Validation & Limits\n\n")
output.append("| Function | Validation Checks | Limit Checks |\n")
output.append("|----------|-------------------|--------------|\n")
for func in functions:
if func.get("error") or not func.get("found"):
continue
name = func.get("function_name") or func.get("class_name", "N/A")
validations = ", ".join(func.get("validation_checks", [])) or "None"
limits = ", ".join(func.get("limit_checks", [])) or "None"
output.append(f"| {name} | {validations} | {limits} |\n")
return "".join(output)
def main():
"""Main execution"""
print("🔍 Extracting AI elements from codebase...")
functions = find_ai_functions()
prompts = extract_prompt_info()
models = extract_model_info()
print(f"✅ Found {len(functions)} functions")
print(f"✅ Found {len(prompts)} prompt types")
print(f"✅ Found {len(models)} models")
# Generate markdown
markdown = generate_markdown_table(functions, prompts, models)
# Save to file
output_file = PROJECT_ROOT / "docs" / "ActiveDocs" / "AI-ELEMENTS-EXTRACTED.md"
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(markdown)
print(f"\n✅ Table saved to: {output_file.relative_to(PROJECT_ROOT)}")
# Also print to console
print("\n" + "="*80)
print(markdown)
print("="*80)
if __name__ == "__main__":
main()