Files
igny8/scripts/extract_ai_elements.py
Gitea Deploy 961362e088 Add SEO fields to Tasks model, improve content generation response handling, and enhance progress bar animation
- Added primary_keyword, secondary_keywords, tags, and categories fields to Tasks model
- Updated generate_content function to handle full JSON response with all SEO fields
- Improved progress bar animation: smooth 1% increments every 300ms
- Enhanced step detection for content generation vs clustering vs ideas
- Fixed progress modal to show correct messages for each function type
- Added comprehensive logging to Keywords and Tasks pages for AI functions
- Fixed error handling to show meaningful error messages instead of generic failures
2025-11-09 21:22:34 +00:00

381 lines
13 KiB
Python

#!/usr/bin/env python3
"""
IGNY8 AI Data Mapping Script
Extracts complete reference table for all AI-related elements (functions, models, prompts, limits, calls)
to eliminate assumptions during restructuring.
Output: Markdown table with all AI Elements for cluster, idea, content, image
"""
import os
import re
import json
import ast
from pathlib import Path
from typing import Dict, List, Any, Optional
# Project root (assuming script is in scripts/ directory)
PROJECT_ROOT = Path(__file__).parent.parent
BACKEND_ROOT = PROJECT_ROOT / "backend" / "igny8_core"
def extract_function_info(file_path: Path, function_name: str) -> Dict[str, Any]:
"""Extract information about a function from a Python file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
return {"error": f"Could not read file: {e}"}
# Try to parse AST
try:
tree = ast.parse(content)
except SyntaxError:
return {"error": "Syntax error in file"}
info = {
"file": str(file_path.relative_to(PROJECT_ROOT)),
"function_name": function_name,
"found": False,
"line_number": None,
"uses_ai_processor": False,
"uses_celery": False,
"has_progress_callback": False,
"has_request_steps": False,
"has_response_steps": False,
"prompt_source": "Unknown",
"model_source": "Unknown",
"validation_checks": [],
"limit_checks": [],
}
# Search for function definition
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == function_name:
info["found"] = True
info["line_number"] = node.lineno
# Check function body for patterns
func_content = ast.get_source_segment(content, node) or ""
# Check for AIProcessor usage
if "AIProcessor" in func_content or "ai_processor" in func_content:
info["uses_ai_processor"] = True
# Check for Celery
if "self.request" in func_content or "@shared_task" in content[:node.lineno * 100]:
info["uses_celery"] = True
# Check for progress tracking
if "progress_callback" in func_content or "progress_tracker" in func_content:
info["has_progress_callback"] = True
# Check for step tracking
if "request_steps" in func_content:
info["has_request_steps"] = True
if "response_steps" in func_content:
info["has_response_steps"] = True
# Check for prompt sources
if "get_prompt_value" in func_content:
info["prompt_source"] = "Database (get_prompt_value)"
elif "get_default_prompt" in func_content:
info["prompt_source"] = "Default (get_default_prompt)"
elif "prompt_template" in func_content.lower():
info["prompt_source"] = "Inline/Hardcoded"
# Check for model selection
if "default_model" in func_content or "self.default_model" in func_content:
info["model_source"] = "AIProcessor.default_model"
elif "get_model" in func_content:
info["model_source"] = "Function.get_model()"
elif "IntegrationSettings" in func_content:
info["model_source"] = "IntegrationSettings.config['model']"
# Check for validation
if "validate" in func_content.lower():
info["validation_checks"].append("Has validate() call")
if "check_credits" in func_content:
info["limit_checks"].append("Credit check")
if "daily_cluster_limit" in func_content or "max_clusters" in func_content:
info["limit_checks"].append("Plan limits")
break
return info
def extract_class_info(file_path: Path, class_name: str) -> Dict[str, Any]:
"""Extract information about a class from a Python file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
return {"error": f"Could not read file: {e}"}
try:
tree = ast.parse(content)
except SyntaxError:
return {"error": "Syntax error in file"}
info = {
"file": str(file_path.relative_to(PROJECT_ROOT)),
"class_name": class_name,
"found": False,
"line_number": None,
"methods": [],
"inherits_from": [],
}
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef) and node.name == class_name:
info["found"] = True
info["line_number"] = node.lineno
# Get base classes
for base in node.bases:
if isinstance(base, ast.Name):
info["inherits_from"].append(base.id)
# Get methods
for item in node.body:
if isinstance(item, ast.FunctionDef):
info["methods"].append(item.name)
break
return info
def find_ai_functions() -> List[Dict[str, Any]]:
"""Find all AI-related functions in the codebase"""
functions = []
# Define AI functions to search for
ai_function_definitions = [
{
"name": "_auto_cluster_keywords_core",
"file": BACKEND_ROOT / "modules" / "planner" / "tasks.py",
"type": "core_function",
"category": "cluster"
},
{
"name": "_generate_single_idea_core",
"file": BACKEND_ROOT / "modules" / "planner" / "tasks.py",
"type": "core_function",
"category": "ideas"
},
{
"name": "auto_generate_content_task",
"file": BACKEND_ROOT / "modules" / "writer" / "tasks.py",
"type": "celery_task",
"category": "content"
},
{
"name": "AutoClusterFunction",
"file": BACKEND_ROOT / "ai" / "functions" / "auto_cluster.py",
"type": "class",
"category": "cluster"
},
{
"name": "cluster_keywords",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "cluster"
},
{
"name": "generate_ideas",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "ideas"
},
{
"name": "generate_content",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "content"
},
{
"name": "generate_image",
"file": BACKEND_ROOT / "utils" / "ai_processor.py",
"type": "method",
"category": "image"
},
{
"name": "run_ai_task",
"file": BACKEND_ROOT / "ai" / "tasks.py",
"type": "celery_task",
"category": "unified"
},
{
"name": "execute",
"file": BACKEND_ROOT / "ai" / "engine.py",
"type": "method",
"category": "unified"
},
]
for func_def in ai_function_definitions:
file_path = func_def["file"]
if not file_path.exists():
continue
if func_def["type"] == "class":
info = extract_class_info(file_path, func_def["name"])
else:
info = extract_function_info(file_path, func_def["name"])
info.update({
"type": func_def["type"],
"category": func_def["category"]
})
functions.append(info)
return functions
def extract_prompt_info() -> List[Dict[str, Any]]:
"""Extract prompt information"""
prompts = []
utils_file = BACKEND_ROOT / "modules" / "system" / "utils.py"
if utils_file.exists():
with open(utils_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find prompt types in get_default_prompt
prompt_types = re.findall(r"'(\w+)':\s*\"\"\"", content)
for prompt_type in prompt_types:
prompts.append({
"prompt_type": prompt_type,
"source": "Hardcoded in get_default_prompt()",
"file": "modules/system/utils.py",
"retrieval": "get_prompt_value() -> AIPrompt model or default"
})
return prompts
def extract_model_info() -> List[Dict[str, Any]]:
"""Extract model configuration information"""
models = []
processor_file = BACKEND_ROOT / "utils" / "ai_processor.py"
if processor_file.exists():
with open(processor_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find MODEL_RATES
model_rates_match = re.search(r'MODEL_RATES\s*=\s*\{([^}]+)\}', content, re.DOTALL)
if model_rates_match:
models_text = model_rates_match.group(1)
model_names = re.findall(r"'([^']+)'", models_text)
for model in model_names:
models.append({
"model_name": model,
"source": "MODEL_RATES constant",
"file": "utils/ai_processor.py",
"selection": "AIProcessor._get_model() -> IntegrationSettings or Django settings"
})
return models
def generate_markdown_table(functions: List[Dict], prompts: List[Dict], models: List[Dict]) -> str:
"""Generate markdown table from extracted data"""
output = []
output.append("# IGNY8 AI Elements Reference Table\n")
output.append("Generated by extract_ai_elements.py\n")
output.append("---\n\n")
# Functions table
output.append("## 🧠 AI Core Functions\n\n")
output.append("| Function Name | Category | Type | File | Line | Uses AIProcessor | Celery | Progress | Steps | Prompt Source | Model Source |\n")
output.append("|---------------|----------|------|------|------|------------------|--------|----------|-------|---------------|--------------|\n")
for func in functions:
if func.get("error"):
continue
name = func.get("function_name") or func.get("class_name", "N/A")
category = func.get("category", "N/A")
func_type = func.get("type", "N/A")
file = func.get("file", "N/A")
line = str(func.get("line_number", "N/A"))
uses_ai = "" if func.get("uses_ai_processor") else ""
celery = "" if func.get("uses_celery") else ""
progress = "" if func.get("has_progress_callback") else ""
steps = "" if (func.get("has_request_steps") or func.get("has_response_steps")) else ""
prompt = func.get("prompt_source", "Unknown")
model = func.get("model_source", "Unknown")
output.append(f"| {name} | {category} | {func_type} | `{file}` | {line} | {uses_ai} | {celery} | {progress} | {steps} | {prompt} | {model} |\n")
# Prompts table
output.append("\n## 🧱 Prompt Sources\n\n")
output.append("| Prompt Type | Source | File | Retrieval Method |\n")
output.append("|-------------|--------|------|------------------|\n")
for prompt in prompts:
output.append(f"| {prompt['prompt_type']} | {prompt['source']} | `{prompt['file']}` | {prompt['retrieval']} |\n")
# Models table
output.append("\n## 🧾 Model Configuration\n\n")
output.append("| Model Name | Source | File | Selection Method |\n")
output.append("|------------|--------|------|------------------|\n")
for model in models:
output.append(f"| {model['model_name']} | {model['source']} | `{model['file']}` | {model['selection']} |\n")
# Validation and Limits
output.append("\n## ⚠️ Validation & Limits\n\n")
output.append("| Function | Validation Checks | Limit Checks |\n")
output.append("|----------|-------------------|--------------|\n")
for func in functions:
if func.get("error") or not func.get("found"):
continue
name = func.get("function_name") or func.get("class_name", "N/A")
validations = ", ".join(func.get("validation_checks", [])) or "None"
limits = ", ".join(func.get("limit_checks", [])) or "None"
output.append(f"| {name} | {validations} | {limits} |\n")
return "".join(output)
def main():
"""Main execution"""
print("🔍 Extracting AI elements from codebase...")
functions = find_ai_functions()
prompts = extract_prompt_info()
models = extract_model_info()
print(f"✅ Found {len(functions)} functions")
print(f"✅ Found {len(prompts)} prompt types")
print(f"✅ Found {len(models)} models")
# Generate markdown
markdown = generate_markdown_table(functions, prompts, models)
# Save to file
output_file = PROJECT_ROOT / "docs" / "ActiveDocs" / "AI-ELEMENTS-EXTRACTED.md"
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(markdown)
print(f"\n✅ Table saved to: {output_file.relative_to(PROJECT_ROOT)}")
# Also print to console
print("\n" + "="*80)
print(markdown)
print("="*80)
if __name__ == "__main__":
main()