381 lines
13 KiB
Python
381 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
IGNY8 AI Data Mapping Script
|
|
Extracts complete reference table for all AI-related elements (functions, models, prompts, limits, calls)
|
|
to eliminate assumptions during restructuring.
|
|
|
|
Output: Markdown table with all AI Elements for cluster, idea, content, image
|
|
"""
|
|
import os
|
|
import re
|
|
import json
|
|
import ast
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any, Optional
|
|
|
|
# Project root (assuming script is in scripts/ directory)
|
|
PROJECT_ROOT = Path(__file__).parent.parent
|
|
BACKEND_ROOT = PROJECT_ROOT / "backend" / "igny8_core"
|
|
|
|
|
|
def extract_function_info(file_path: Path, function_name: str) -> Dict[str, Any]:
|
|
"""Extract information about a function from a Python file"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
return {"error": f"Could not read file: {e}"}
|
|
|
|
# Try to parse AST
|
|
try:
|
|
tree = ast.parse(content)
|
|
except SyntaxError:
|
|
return {"error": "Syntax error in file"}
|
|
|
|
info = {
|
|
"file": str(file_path.relative_to(PROJECT_ROOT)),
|
|
"function_name": function_name,
|
|
"found": False,
|
|
"line_number": None,
|
|
"uses_ai_processor": False,
|
|
"uses_celery": False,
|
|
"has_progress_callback": False,
|
|
"has_request_steps": False,
|
|
"has_response_steps": False,
|
|
"prompt_source": "Unknown",
|
|
"model_source": "Unknown",
|
|
"validation_checks": [],
|
|
"limit_checks": [],
|
|
}
|
|
|
|
# Search for function definition
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.FunctionDef) and node.name == function_name:
|
|
info["found"] = True
|
|
info["line_number"] = node.lineno
|
|
|
|
# Check function body for patterns
|
|
func_content = ast.get_source_segment(content, node) or ""
|
|
|
|
# Check for AIProcessor usage
|
|
if "AIProcessor" in func_content or "ai_processor" in func_content:
|
|
info["uses_ai_processor"] = True
|
|
|
|
# Check for Celery
|
|
if "self.request" in func_content or "@shared_task" in content[:node.lineno * 100]:
|
|
info["uses_celery"] = True
|
|
|
|
# Check for progress tracking
|
|
if "progress_callback" in func_content or "progress_tracker" in func_content:
|
|
info["has_progress_callback"] = True
|
|
|
|
# Check for step tracking
|
|
if "request_steps" in func_content:
|
|
info["has_request_steps"] = True
|
|
if "response_steps" in func_content:
|
|
info["has_response_steps"] = True
|
|
|
|
# Check for prompt sources
|
|
if "get_prompt_value" in func_content:
|
|
info["prompt_source"] = "Database (get_prompt_value)"
|
|
elif "get_default_prompt" in func_content:
|
|
info["prompt_source"] = "Default (get_default_prompt)"
|
|
elif "prompt_template" in func_content.lower():
|
|
info["prompt_source"] = "Inline/Hardcoded"
|
|
|
|
# Check for model selection
|
|
if "default_model" in func_content or "self.default_model" in func_content:
|
|
info["model_source"] = "AIProcessor.default_model"
|
|
elif "get_model" in func_content:
|
|
info["model_source"] = "Function.get_model()"
|
|
elif "IntegrationSettings" in func_content:
|
|
info["model_source"] = "IntegrationSettings.config['model']"
|
|
|
|
# Check for validation
|
|
if "validate" in func_content.lower():
|
|
info["validation_checks"].append("Has validate() call")
|
|
if "check_credits" in func_content:
|
|
info["limit_checks"].append("Credit check")
|
|
if "daily_cluster_limit" in func_content or "max_clusters" in func_content:
|
|
info["limit_checks"].append("Plan limits")
|
|
|
|
break
|
|
|
|
return info
|
|
|
|
|
|
def extract_class_info(file_path: Path, class_name: str) -> Dict[str, Any]:
|
|
"""Extract information about a class from a Python file"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
return {"error": f"Could not read file: {e}"}
|
|
|
|
try:
|
|
tree = ast.parse(content)
|
|
except SyntaxError:
|
|
return {"error": "Syntax error in file"}
|
|
|
|
info = {
|
|
"file": str(file_path.relative_to(PROJECT_ROOT)),
|
|
"class_name": class_name,
|
|
"found": False,
|
|
"line_number": None,
|
|
"methods": [],
|
|
"inherits_from": [],
|
|
}
|
|
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.ClassDef) and node.name == class_name:
|
|
info["found"] = True
|
|
info["line_number"] = node.lineno
|
|
|
|
# Get base classes
|
|
for base in node.bases:
|
|
if isinstance(base, ast.Name):
|
|
info["inherits_from"].append(base.id)
|
|
|
|
# Get methods
|
|
for item in node.body:
|
|
if isinstance(item, ast.FunctionDef):
|
|
info["methods"].append(item.name)
|
|
|
|
break
|
|
|
|
return info
|
|
|
|
|
|
def find_ai_functions() -> List[Dict[str, Any]]:
|
|
"""Find all AI-related functions in the codebase"""
|
|
functions = []
|
|
|
|
# Define AI functions to search for
|
|
ai_function_definitions = [
|
|
{
|
|
"name": "_auto_cluster_keywords_core",
|
|
"file": BACKEND_ROOT / "modules" / "planner" / "tasks.py",
|
|
"type": "core_function",
|
|
"category": "cluster"
|
|
},
|
|
{
|
|
"name": "_generate_single_idea_core",
|
|
"file": BACKEND_ROOT / "modules" / "planner" / "tasks.py",
|
|
"type": "core_function",
|
|
"category": "ideas"
|
|
},
|
|
{
|
|
"name": "auto_generate_content_task",
|
|
"file": BACKEND_ROOT / "modules" / "writer" / "tasks.py",
|
|
"type": "celery_task",
|
|
"category": "content"
|
|
},
|
|
{
|
|
"name": "AutoClusterFunction",
|
|
"file": BACKEND_ROOT / "ai" / "functions" / "auto_cluster.py",
|
|
"type": "class",
|
|
"category": "cluster"
|
|
},
|
|
{
|
|
"name": "cluster_keywords",
|
|
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
|
|
"type": "method",
|
|
"category": "cluster"
|
|
},
|
|
{
|
|
"name": "generate_ideas",
|
|
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
|
|
"type": "method",
|
|
"category": "ideas"
|
|
},
|
|
{
|
|
"name": "generate_content",
|
|
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
|
|
"type": "method",
|
|
"category": "content"
|
|
},
|
|
{
|
|
"name": "generate_image",
|
|
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
|
|
"type": "method",
|
|
"category": "image"
|
|
},
|
|
{
|
|
"name": "run_ai_task",
|
|
"file": BACKEND_ROOT / "ai" / "tasks.py",
|
|
"type": "celery_task",
|
|
"category": "unified"
|
|
},
|
|
{
|
|
"name": "execute",
|
|
"file": BACKEND_ROOT / "ai" / "engine.py",
|
|
"type": "method",
|
|
"category": "unified"
|
|
},
|
|
]
|
|
|
|
for func_def in ai_function_definitions:
|
|
file_path = func_def["file"]
|
|
if not file_path.exists():
|
|
continue
|
|
|
|
if func_def["type"] == "class":
|
|
info = extract_class_info(file_path, func_def["name"])
|
|
else:
|
|
info = extract_function_info(file_path, func_def["name"])
|
|
|
|
info.update({
|
|
"type": func_def["type"],
|
|
"category": func_def["category"]
|
|
})
|
|
|
|
functions.append(info)
|
|
|
|
return functions
|
|
|
|
|
|
def extract_prompt_info() -> List[Dict[str, Any]]:
|
|
"""Extract prompt information"""
|
|
prompts = []
|
|
|
|
utils_file = BACKEND_ROOT / "modules" / "system" / "utils.py"
|
|
if utils_file.exists():
|
|
with open(utils_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Find prompt types in get_default_prompt
|
|
prompt_types = re.findall(r"'(\w+)':\s*\"\"\"", content)
|
|
for prompt_type in prompt_types:
|
|
prompts.append({
|
|
"prompt_type": prompt_type,
|
|
"source": "Hardcoded in get_default_prompt()",
|
|
"file": "modules/system/utils.py",
|
|
"retrieval": "get_prompt_value() -> AIPrompt model or default"
|
|
})
|
|
|
|
return prompts
|
|
|
|
|
|
def extract_model_info() -> List[Dict[str, Any]]:
|
|
"""Extract model configuration information"""
|
|
models = []
|
|
|
|
processor_file = BACKEND_ROOT / "utils" / "ai_processor.py"
|
|
if processor_file.exists():
|
|
with open(processor_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Find MODEL_RATES
|
|
model_rates_match = re.search(r'MODEL_RATES\s*=\s*\{([^}]+)\}', content, re.DOTALL)
|
|
if model_rates_match:
|
|
models_text = model_rates_match.group(1)
|
|
model_names = re.findall(r"'([^']+)'", models_text)
|
|
for model in model_names:
|
|
models.append({
|
|
"model_name": model,
|
|
"source": "MODEL_RATES constant",
|
|
"file": "utils/ai_processor.py",
|
|
"selection": "AIProcessor._get_model() -> IntegrationSettings or Django settings"
|
|
})
|
|
|
|
return models
|
|
|
|
|
|
def generate_markdown_table(functions: List[Dict], prompts: List[Dict], models: List[Dict]) -> str:
|
|
"""Generate markdown table from extracted data"""
|
|
output = []
|
|
output.append("# IGNY8 AI Elements Reference Table\n")
|
|
output.append("Generated by extract_ai_elements.py\n")
|
|
output.append("---\n\n")
|
|
|
|
# Functions table
|
|
output.append("## 🧠 AI Core Functions\n\n")
|
|
output.append("| Function Name | Category | Type | File | Line | Uses AIProcessor | Celery | Progress | Steps | Prompt Source | Model Source |\n")
|
|
output.append("|---------------|----------|------|------|------|------------------|--------|----------|-------|---------------|--------------|\n")
|
|
|
|
for func in functions:
|
|
if func.get("error"):
|
|
continue
|
|
|
|
name = func.get("function_name") or func.get("class_name", "N/A")
|
|
category = func.get("category", "N/A")
|
|
func_type = func.get("type", "N/A")
|
|
file = func.get("file", "N/A")
|
|
line = str(func.get("line_number", "N/A"))
|
|
uses_ai = "✅" if func.get("uses_ai_processor") else "❌"
|
|
celery = "✅" if func.get("uses_celery") else "❌"
|
|
progress = "✅" if func.get("has_progress_callback") else "❌"
|
|
steps = "✅" if (func.get("has_request_steps") or func.get("has_response_steps")) else "❌"
|
|
prompt = func.get("prompt_source", "Unknown")
|
|
model = func.get("model_source", "Unknown")
|
|
|
|
output.append(f"| {name} | {category} | {func_type} | `{file}` | {line} | {uses_ai} | {celery} | {progress} | {steps} | {prompt} | {model} |\n")
|
|
|
|
# Prompts table
|
|
output.append("\n## 🧱 Prompt Sources\n\n")
|
|
output.append("| Prompt Type | Source | File | Retrieval Method |\n")
|
|
output.append("|-------------|--------|------|------------------|\n")
|
|
|
|
for prompt in prompts:
|
|
output.append(f"| {prompt['prompt_type']} | {prompt['source']} | `{prompt['file']}` | {prompt['retrieval']} |\n")
|
|
|
|
# Models table
|
|
output.append("\n## 🧾 Model Configuration\n\n")
|
|
output.append("| Model Name | Source | File | Selection Method |\n")
|
|
output.append("|------------|--------|------|------------------|\n")
|
|
|
|
for model in models:
|
|
output.append(f"| {model['model_name']} | {model['source']} | `{model['file']}` | {model['selection']} |\n")
|
|
|
|
# Validation and Limits
|
|
output.append("\n## ⚠️ Validation & Limits\n\n")
|
|
output.append("| Function | Validation Checks | Limit Checks |\n")
|
|
output.append("|----------|-------------------|--------------|\n")
|
|
|
|
for func in functions:
|
|
if func.get("error") or not func.get("found"):
|
|
continue
|
|
|
|
name = func.get("function_name") or func.get("class_name", "N/A")
|
|
validations = ", ".join(func.get("validation_checks", [])) or "None"
|
|
limits = ", ".join(func.get("limit_checks", [])) or "None"
|
|
|
|
output.append(f"| {name} | {validations} | {limits} |\n")
|
|
|
|
return "".join(output)
|
|
|
|
|
|
def main():
|
|
"""Main execution"""
|
|
print("🔍 Extracting AI elements from codebase...")
|
|
|
|
functions = find_ai_functions()
|
|
prompts = extract_prompt_info()
|
|
models = extract_model_info()
|
|
|
|
print(f"✅ Found {len(functions)} functions")
|
|
print(f"✅ Found {len(prompts)} prompt types")
|
|
print(f"✅ Found {len(models)} models")
|
|
|
|
# Generate markdown
|
|
markdown = generate_markdown_table(functions, prompts, models)
|
|
|
|
# Save to file
|
|
output_file = PROJECT_ROOT / "docs" / "ActiveDocs" / "AI-ELEMENTS-EXTRACTED.md"
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(markdown)
|
|
|
|
print(f"\n✅ Table saved to: {output_file.relative_to(PROJECT_ROOT)}")
|
|
|
|
# Also print to console
|
|
print("\n" + "="*80)
|
|
print(markdown)
|
|
print("="*80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|