Files
igny8/frontend/src/components/common/HTMLContentRenderer.tsx
2025-12-17 05:58:13 +00:00

375 lines
14 KiB
TypeScript

/**
* HTMLContentRenderer Component
* Safely renders HTML content with proper formatting and sanitization
*/
import React, { useMemo } from 'react';
import { sanitizeHTML, isHTML } from '../../utils/htmlSanitizer';
interface HTMLContentRendererProps {
content: string | null | undefined;
className?: string;
maxHeight?: string;
}
/**
* Parse and format content outline (JSON structure)
*/
function formatContentOutline(content: any): string {
if (!content) return '';
// If object contains a `content` field with HTML, use that directly
if (typeof content === 'object' && content !== null && 'content' in content) {
const mainContent = (content as any).content;
if (typeof mainContent === 'string' && mainContent.trim().length > 0) {
return sanitizeHTML(mainContent);
}
}
let html = '<div class="content-outline">';
// NEW FORMAT: Handle overview + outline structure
if (content.overview && content.outline) {
// Display overview
html += '<div class="outline-intro">';
html += `<div class="outline-paragraph"><strong>Overview:</strong> ${escapeHTML(content.overview)}</div>`;
html += '</div>';
// Display intro focus if available
if (content.outline.intro_focus) {
html += '<div class="outline-section">';
html += `<h3 class="section-heading">Introduction Focus</h3>`;
html += `<div class="section-details">${escapeHTML(content.outline.intro_focus)}</div>`;
html += '</div>';
}
// Display main sections
if (content.outline.main_sections && Array.isArray(content.outline.main_sections)) {
content.outline.main_sections.forEach((section: any) => {
html += '<div class="outline-section">';
if (section.h2_topic) {
html += `<h3 class="section-heading">${escapeHTML(section.h2_topic)}</h3>`;
}
if (section.coverage) {
html += `<div class="section-details">${escapeHTML(section.coverage)}</div>`;
}
html += '</div>';
});
}
html += '</div>';
return html;
}
// Handle introduction section - can be object or string
if (content.introduction) {
html += '<div class="outline-intro">';
if (typeof content.introduction === 'string') {
// Introduction is a simple string
html += `<div class="outline-paragraph">${escapeHTML(content.introduction)}</div>`;
} else if (typeof content.introduction === 'object') {
// Introduction is an object with hook and paragraphs
if (content.introduction.hook) {
html += `<div class="outline-hook"><strong>Hook:</strong> ${escapeHTML(content.introduction.hook)}</div>`;
}
if (content.introduction.paragraphs && Array.isArray(content.introduction.paragraphs)) {
content.introduction.paragraphs.forEach((para: any, index: number) => {
if (para.details) {
html += `<div class="outline-paragraph"><strong>Intro Paragraph ${index + 1}:</strong> ${escapeHTML(para.details)}</div>`;
}
});
}
}
html += '</div>';
}
// Handle sections array format (Format 3: nested structure)
if (content.sections && Array.isArray(content.sections)) {
content.sections.forEach((section: any) => {
if (!section) return;
html += '<div class="outline-section">';
// Handle section title (can be "H2: ..." or just text)
if (section.title) {
const titleText = section.title.replace(/^H2:\s*/i, '').trim();
if (titleText.toLowerCase() === 'conclusion') {
html += `<h3 class="section-heading">${escapeHTML(titleText)}</h3>`;
} else {
html += `<h3 class="section-heading">${escapeHTML(titleText)}</h3>`;
}
}
// Handle section content - can be array or string
if (section.content) {
if (Array.isArray(section.content)) {
// Content is an array of objects with title (H3) and content
section.content.forEach((item: any) => {
if (item.title) {
const subTitleText = item.title.replace(/^H3:\s*/i, '').trim();
html += `<h4 class="subsection-heading">${escapeHTML(subTitleText)}</h4>`;
}
if (item.content) {
html += `<div class="section-details">${escapeHTML(String(item.content))}</div>`;
}
});
} else if (typeof section.content === 'string') {
// Content is a simple string
html += `<div class="section-details">${escapeHTML(section.content)}</div>`;
}
}
html += '</div>';
});
}
// Handle H2 sections - can be array or simple key-value pairs
if (content.H2) {
if (Array.isArray(content.H2)) {
// Structured format: array of section objects
content.H2.forEach((section: any) => {
if (section.heading || typeof section === 'string') {
html += `<div class="outline-section">`;
const heading = section.heading || section;
html += `<h3 class="section-heading">${escapeHTML(heading)}</h3>`;
// Handle content type badge
if (section.content_type) {
html += `<div class="content-type-badge">${escapeHTML(section.content_type.replace('_', ' ').toUpperCase())}</div>`;
}
// Handle subsections (H3)
if (section.subsections && Array.isArray(section.subsections)) {
section.subsections.forEach((subsection: any) => {
const subheading = subsection.subheading || subsection.heading || subsection;
html += `<h4 class="subsection-heading">${escapeHTML(subheading)}</h4>`;
if (subsection.details) {
html += `<div class="section-details">${escapeHTML(subsection.details)}</div>`;
}
});
}
// Handle details
if (section.details) {
html += `<div class="section-details">${escapeHTML(section.details)}</div>`;
}
html += `</div>`;
}
});
} else if (typeof content.H2 === 'string') {
// Simple format: just a string (GPT-4o mini sometimes returns this)
html += `<div class="outline-section">`;
html += `<h3 class="section-heading">${escapeHTML(content.H2)}</h3>`;
html += `</div>`;
} else if (typeof content.H2 === 'object') {
// Simple key-value format (GPT-4o mini format)
Object.entries(content.H2).forEach(([key, value]: [string, any]) => {
html += `<div class="outline-section">`;
html += `<h3 class="section-heading">${escapeHTML(value)}</h3>`;
html += `</div>`;
});
}
}
// Handle H3 as a direct property (for GPT-4o mini simple format)
if (content.H3 && !content.H2) {
html += `<div class="outline-section">`;
if (typeof content.H3 === 'string') {
html += `<h4 class="subsection-heading">${escapeHTML(content.H3)}</h4>`;
} else if (typeof content.H3 === 'object') {
Object.entries(content.H3).forEach(([key, value]: [string, any]) => {
html += `<h4 class="subsection-heading">${escapeHTML(value)}</h4>`;
});
}
html += `</div>`;
}
html += '</div>';
return html;
}
/**
* Escape HTML to prevent XSS
*/
function escapeHTML(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
const HTMLContentRenderer: React.FC<HTMLContentRendererProps> = ({
content,
className = '',
maxHeight,
}) => {
const renderedContent = useMemo(() => {
if (!content) return '<div class="text-gray-400 italic">No content available</div>';
// If content is already an object (dict), use it directly
if (typeof content === 'object' && content !== null) {
// Check for any known structure format
if (content.overview || content.outline || content.H2 || content.H3 || content.introduction || content.sections) {
return formatContentOutline(content);
}
// If it's an object but not structured, try to format it
try {
// Check if it has any keys that suggest it's a structured outline
const keys = Object.keys(content);
if (keys.length > 0) {
// Try to format it as outline anyway
return formatContentOutline(content);
}
return escapeHTML(JSON.stringify(content, null, 2));
} catch {
return escapeHTML(JSON.stringify(content, null, 2));
}
}
// If content is a string, try to parse as JSON first
if (typeof content === 'string') {
// Check if it's a JSON string that contains the actual content
if (content.trim().startsWith('{') || content.trim().startsWith('[')) {
try {
const parsed = JSON.parse(content);
if (typeof parsed === 'object' && parsed !== null) {
// If it's a full AI response JSON with a 'content' field, use that
if (parsed.content && typeof parsed.content === 'string') {
// Recursively process the extracted content
const extractedContent = parsed.content;
// Check if extracted content is HTML
if (isHTML(extractedContent)) {
const sanitized = sanitizeHTML(extractedContent);
if (sanitized.trim().startsWith('<article') || sanitized.trim().startsWith('<div')) {
return `<div class="normalized-html-content">${sanitized}</div>`;
}
return `<div class="normalized-html-content"><article>${sanitized}</article></div>`;
}
// If extracted content is still JSON, try parsing again
if (extractedContent.trim().startsWith('{')) {
try {
const nestedParsed = JSON.parse(extractedContent);
if (nestedParsed.H2 || nestedParsed.H3 || nestedParsed.introduction || nestedParsed.sections) {
return formatContentOutline(nestedParsed);
}
} catch {
// Not nested JSON, continue
}
}
// Use extracted content as-is (will be processed below)
content = extractedContent;
} else if (parsed.overview || parsed.outline || parsed.H2 || parsed.H3 || parsed.introduction || parsed.sections) {
// It's a content outline structure
return formatContentOutline(parsed);
}
}
} catch {
// Not valid JSON, continue with HTML/text processing
}
}
// Try to parse as JSON (content outline from GPT-4o mini) - for non-brace-starting JSON
try {
const parsed = JSON.parse(content);
if (typeof parsed === 'object' && (parsed.overview || parsed.outline || parsed.H2 || parsed.H3 || parsed.introduction || parsed.sections)) {
return formatContentOutline(parsed);
}
} catch {
// Not JSON, continue with HTML/text processing
}
// Check if it's HTML (normalized content from backend)
if (isHTML(content)) {
// Content is already normalized HTML - sanitize and return
const sanitized = sanitizeHTML(content);
// Add wrapper classes for better styling in toggle row
// Check if content already has article or wrapper
if (sanitized.trim().startsWith('<article') || sanitized.trim().startsWith('<div')) {
return `<div class="normalized-html-content">${sanitized}</div>`;
}
return `<div class="normalized-html-content"><article>${sanitized}</article></div>`;
}
// Plain text (from GPT-4o) - format bullet points and line breaks
// Convert bullet points to HTML list
const lines = content.split('\n');
let html = '<div class="content-outline">';
let inList = false;
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) {
if (inList) {
html += '</ul>';
inList = false;
}
html += '<br>';
continue;
}
// Check for bullet points (- or *)
if (trimmed.match(/^[-*]\s+/)) {
if (!inList) {
html += '<ul class="outline-list">';
inList = true;
}
const text = trimmed.replace(/^[-*]\s+/, '');
// Check for nested bullets (indented)
if (trimmed.startsWith(' ') || trimmed.startsWith('\t')) {
html += `<li class="outline-item nested">${escapeHTML(text)}</li>`;
} else {
html += `<li class="outline-item">${escapeHTML(text)}</li>`;
}
}
// Check for H2 headings (starting with - H2:)
else if (trimmed.match(/^[-*]\s*H2[:]/i)) {
if (inList) {
html += '</ul>';
inList = false;
}
const heading = trimmed.replace(/^[-*]\s*H2[:]\s*/i, '');
html += `<h3 class="section-heading">${escapeHTML(heading)}</h3>`;
}
// Check for H3 headings (starting with - H3:)
else if (trimmed.match(/^[-*]\s*H3[:]/i)) {
if (inList) {
html += '</ul>';
inList = false;
}
const heading = trimmed.replace(/^[-*]\s*H3[:]\s*/i, '');
html += `<h4 class="subsection-heading">${escapeHTML(heading)}</h4>`;
}
// Regular paragraph
else {
if (inList) {
html += '</ul>';
inList = false;
}
html += `<p class="outline-paragraph">${escapeHTML(trimmed)}</p>`;
}
}
if (inList) {
html += '</ul>';
}
html += '</div>';
return html;
}
// Fallback: convert to string
return escapeHTML(String(content));
}, [content]);
return (
<div
className={`html-content-renderer ${className}`}
style={maxHeight ? { maxHeight, overflow: 'auto' } : undefined}
dangerouslySetInnerHTML={{ __html: renderedContent }}
/>
);
};
export default HTMLContentRenderer;