igny8/frontend/src/components/common/HTMLContentRenderer.tsx

/**
 * HTMLContentRenderer Component
 * Safely renders HTML content with proper formatting and sanitization
 */

import React, { useMemo } from 'react';
import { sanitizeHTML, isHTML } from '../../utils/htmlSanitizer';

interface HTMLContentRendererProps {
  content: string | null | undefined;
  className?: string;
  maxHeight?: string;
}

/**
 * Parse and format content outline (JSON structure)
 */
function formatContentOutline(content: any): string {
  if (!content) return '';

  // If object contains a `content` field with HTML, use that directly
  if (typeof content === 'object' && content !== null && 'content' in content) {
    const mainContent = (content as any).content;
    if (typeof mainContent === 'string' && mainContent.trim().length > 0) {
      return sanitizeHTML(mainContent);
    }
  }

  let html = '<div class="content-outline">';

  // NEW FORMAT: Handle overview + outline structure
  if (content.overview && content.outline) {
    // Display overview
    html += '<div class="outline-intro">';
    html += `<div class="outline-paragraph"><strong>Overview:</strong> ${escapeHTML(content.overview)}</div>`;
    html += '</div>';

    // Display intro focus if available
    if (content.outline.intro_focus) {
      html += '<div class="outline-section">';
      html += `<h3 class="section-heading">Introduction Focus</h3>`;
      html += `<div class="section-details">${escapeHTML(content.outline.intro_focus)}</div>`;
      html += '</div>';
    }

    // Display main sections
    if (content.outline.main_sections && Array.isArray(content.outline.main_sections)) {
      content.outline.main_sections.forEach((section: any) => {
        html += '<div class="outline-section">';
        if (section.h2_topic) {
          html += `<h3 class="section-heading">${escapeHTML(section.h2_topic)}</h3>`;
        }
        if (section.coverage) {
          html += `<div class="section-details">${escapeHTML(section.coverage)}</div>`;
        }
        html += '</div>';
      });
    }

    html += '</div>';
    return html;
  }

  // Handle introduction section - can be object or string
  if (content.introduction) {
    html += '<div class="outline-intro">';
    if (typeof content.introduction === 'string') {
      // Introduction is a simple string
      html += `<div class="outline-paragraph">${escapeHTML(content.introduction)}</div>`;
    } else if (typeof content.introduction === 'object') {
      // Introduction is an object with hook and paragraphs
      if (content.introduction.hook) {
        html += `<div class="outline-hook"><strong>Hook:</strong> ${escapeHTML(content.introduction.hook)}</div>`;
      }
      if (content.introduction.paragraphs && Array.isArray(content.introduction.paragraphs)) {
        content.introduction.paragraphs.forEach((para: any, index: number) => {
          if (para.details) {
            html += `<div class="outline-paragraph"><strong>Intro Paragraph ${index + 1}:</strong> ${escapeHTML(para.details)}</div>`;
          }
        });
      }
    }
    html += '</div>';
  }

  // Handle sections array format (Format 3: nested structure)
  if (content.sections && Array.isArray(content.sections)) {
    content.sections.forEach((section: any) => {
      if (!section) return;

      html += '<div class="outline-section">';

      // Handle section title (can be "H2: ..." or just text)
      if (section.title) {
        const titleText = section.title.replace(/^H2:\s*/i, '').trim();
        if (titleText.toLowerCase() === 'conclusion') {
          html += `<h3 class="section-heading">${escapeHTML(titleText)}</h3>`;
        } else {
          html += `<h3 class="section-heading">${escapeHTML(titleText)}</h3>`;
        }
      }

      // Handle section content - can be array or string
      if (section.content) {
        if (Array.isArray(section.content)) {
          // Content is an array of objects with title (H3) and content
          section.content.forEach((item: any) => {
            if (item.title) {
              const subTitleText = item.title.replace(/^H3:\s*/i, '').trim();
              html += `<h4 class="subsection-heading">${escapeHTML(subTitleText)}</h4>`;
            }
            if (item.content) {
              html += `<div class="section-details">${escapeHTML(String(item.content))}</div>`;
            }
          });
        } else if (typeof section.content === 'string') {
          // Content is a simple string
          html += `<div class="section-details">${escapeHTML(section.content)}</div>`;
        }
      }

      html += '</div>';
    });
  }

  // Handle H2 sections - can be array or simple key-value pairs
  if (content.H2) {
    if (Array.isArray(content.H2)) {
      // Structured format: array of section objects
      content.H2.forEach((section: any) => {
        if (section.heading || typeof section === 'string') {
          html += `<div class="outline-section">`;
          const heading = section.heading || section;
          html += `<h3 class="section-heading">${escapeHTML(heading)}</h3>`;

          // Handle content type badge
          if (section.content_type) {
            html += `<div class="content-type-badge">${escapeHTML(section.content_type.replace('_', ' ').toUpperCase())}</div>`;
          }

          // Handle subsections (H3)
          if (section.subsections && Array.isArray(section.subsections)) {
            section.subsections.forEach((subsection: any) => {
              const subheading = subsection.subheading || subsection.heading || subsection;
              html += `<h4 class="subsection-heading">${escapeHTML(subheading)}</h4>`;
              if (subsection.details) {
                html += `<div class="section-details">${escapeHTML(subsection.details)}</div>`;
              }
            });
          }

          // Handle details
          if (section.details) {
            html += `<div class="section-details">${escapeHTML(section.details)}</div>`;
          }

          html += `</div>`;
        }
      });
    } else if (typeof content.H2 === 'string') {
      // Simple format: just a string (GPT-4o mini sometimes returns this)
      html += `<div class="outline-section">`;
      html += `<h3 class="section-heading">${escapeHTML(content.H2)}</h3>`;
      html += `</div>`;
    } else if (typeof content.H2 === 'object') {
      // Simple key-value format (GPT-4o mini format)
      Object.entries(content.H2).forEach(([key, value]: [string, any]) => {
        html += `<div class="outline-section">`;
        html += `<h3 class="section-heading">${escapeHTML(value)}</h3>`;
        html += `</div>`;
      });
    }
  }

  // Handle H3 as a direct property (for GPT-4o mini simple format)
  if (content.H3 && !content.H2) {
    html += `<div class="outline-section">`;
    if (typeof content.H3 === 'string') {
      html += `<h4 class="subsection-heading">${escapeHTML(content.H3)}</h4>`;
    } else if (typeof content.H3 === 'object') {
      Object.entries(content.H3).forEach(([key, value]: [string, any]) => {
        html += `<h4 class="subsection-heading">${escapeHTML(value)}</h4>`;
      });
    }
    html += `</div>`;
  }

  html += '</div>';
  return html;
}

/**
 * Escape HTML to prevent XSS
 */
function escapeHTML(text: string): string {
  const div = document.createElement('div');
  div.textContent = text;
  return div.innerHTML;
}

const HTMLContentRenderer: React.FC<HTMLContentRendererProps> = ({
  content,
  className = '',
  maxHeight,
}) => {
  const renderedContent = useMemo(() => {
    if (!content) return '<div class="text-gray-400 italic">No content available</div>';

    // If content is already an object (dict), use it directly
    if (typeof content === 'object' && content !== null) {
      // Check for any known structure format
      if (content.overview || content.outline || content.H2 || content.H3 || content.introduction || content.sections) {
        return formatContentOutline(content);
      }
      // If it's an object but not structured, try to format it
      try {
        // Check if it has any keys that suggest it's a structured outline
        const keys = Object.keys(content);
        if (keys.length > 0) {
          // Try to format it as outline anyway
          return formatContentOutline(content);
        }
        return escapeHTML(JSON.stringify(content, null, 2));
      } catch {
        return escapeHTML(JSON.stringify(content, null, 2));
      }
    }

    // If content is a string, try to parse as JSON first
    if (typeof content === 'string') {
      // Check if it's a JSON string that contains the actual content
      if (content.trim().startsWith('{') || content.trim().startsWith('[')) {
        try {
          const parsed = JSON.parse(content);
          if (typeof parsed === 'object' && parsed !== null) {
            // If it's a full AI response JSON with a 'content' field, use that
            if (parsed.content && typeof parsed.content === 'string') {
              // Recursively process the extracted content
              const extractedContent = parsed.content;
              // Check if extracted content is HTML
              if (isHTML(extractedContent)) {
                const sanitized = sanitizeHTML(extractedContent);
                if (sanitized.trim().startsWith('<article') || sanitized.trim().startsWith('<div')) {
                  return `<div class="normalized-html-content">${sanitized}</div>`;
                }
                return `<div class="normalized-html-content"><article>${sanitized}</article></div>`;
              }
              // If extracted content is still JSON, try parsing again
              if (extractedContent.trim().startsWith('{')) {
                try {
                  const nestedParsed = JSON.parse(extractedContent);
                  if (nestedParsed.H2 || nestedParsed.H3 || nestedParsed.introduction || nestedParsed.sections) {
                    return formatContentOutline(nestedParsed);
                  }
                } catch {
                  // Not nested JSON, continue
                }
              }
              // Use extracted content as-is (will be processed below)
              content = extractedContent;
            } else if (parsed.overview || parsed.outline || parsed.H2 || parsed.H3 || parsed.introduction || parsed.sections) {
              // It's a content outline structure
              return formatContentOutline(parsed);
            }
          }
        } catch {
          // Not valid JSON, continue with HTML/text processing
        }
      }

      // Try to parse as JSON (content outline from GPT-4o mini) - for non-brace-starting JSON
      try {
        const parsed = JSON.parse(content);
        if (typeof parsed === 'object' && (parsed.overview || parsed.outline || parsed.H2 || parsed.H3 || parsed.introduction || parsed.sections)) {
          return formatContentOutline(parsed);
        }
      } catch {
        // Not JSON, continue with HTML/text processing
      }

      // Check if it's HTML (normalized content from backend)
      if (isHTML(content)) {
        // Content is already normalized HTML - sanitize and return
        const sanitized = sanitizeHTML(content);

        // Add wrapper classes for better styling in toggle row
        // Check if content already has article or wrapper
        if (sanitized.trim().startsWith('<article') || sanitized.trim().startsWith('<div')) {
          return `<div class="normalized-html-content">${sanitized}</div>`;
        }
        return `<div class="normalized-html-content"><article>${sanitized}</article></div>`;
      }

      // Plain text (from GPT-4o) - format bullet points and line breaks
      // Convert bullet points to HTML list
      const lines = content.split('\n');
      let html = '<div class="content-outline">';
      let inList = false;

      for (const line of lines) {
        const trimmed = line.trim();
        if (!trimmed) {
          if (inList) {
            html += '</ul>';
            inList = false;
          }
          html += '<br>';
          continue;
        }

        // Check for bullet points (- or *)
        if (trimmed.match(/^[-*]\s+/)) {
          if (!inList) {
            html += '<ul class="outline-list">';
            inList = true;
          }
          const text = trimmed.replace(/^[-*]\s+/, '');
          // Check for nested bullets (indented)
          if (trimmed.startsWith('  ') || trimmed.startsWith('\t')) {
            html += `<li class="outline-item nested">${escapeHTML(text)}</li>`;
          } else {
            html += `<li class="outline-item">${escapeHTML(text)}</li>`;
          }
        }
        // Check for H2 headings (starting with - H2:)
        else if (trimmed.match(/^[-*]\s*H2[:]/i)) {
          if (inList) {
            html += '</ul>';
            inList = false;
          }
          const heading = trimmed.replace(/^[-*]\s*H2[:]\s*/i, '');
          html += `<h3 class="section-heading">${escapeHTML(heading)}</h3>`;
        }
        // Check for H3 headings (starting with - H3:)
        else if (trimmed.match(/^[-*]\s*H3[:]/i)) {
          if (inList) {
            html += '</ul>';
            inList = false;
          }
          const heading = trimmed.replace(/^[-*]\s*H3[:]\s*/i, '');
          html += `<h4 class="subsection-heading">${escapeHTML(heading)}</h4>`;
        }
        // Regular paragraph
        else {
          if (inList) {
            html += '</ul>';
            inList = false;
          }
          html += `<p class="outline-paragraph">${escapeHTML(trimmed)}</p>`;
        }
      }

      if (inList) {
        html += '</ul>';
      }
      html += '</div>';
      return html;
    }

    // Fallback: convert to string
    return escapeHTML(String(content));
  }, [content]);

  return (
    <div
      className={`html-content-renderer ${className}`}
      style={maxHeight ? { maxHeight, overflow: 'auto' } : undefined}
      dangerouslySetInnerHTML={{ __html: renderedContent }}
    />
  );
};

export default HTMLContentRenderer;