Enhance content parsing and metadata extraction in HTMLContentRenderer and ToggleTableRow

- Improved HTMLContentRenderer to better handle JSON content and extract HTML safely. - Updated ToggleTableRow to robustly extract meta descriptions, including parsing potential JSON strings. - Refactored Content page to conditionally display meta descriptions based on JSON parsing results, enhancing user experience.
2025-11-10 14:27:56 +00:00
parent 926ac150fd
commit e067dc759c
4 changed files with 87 additions and 28 deletions
--- a/frontend/src/components/common/HTMLContentRenderer.tsx
+++ b/frontend/src/components/common/HTMLContentRenderer.tsx
@@ -195,7 +195,47 @@ const HTMLContentRenderer: React.FC<HTMLContentRendererProps> = ({
    
    // If content is a string, try to parse as JSON first
    if (typeof content === 'string') {
-      // Try to parse as JSON (content outline from GPT-4o mini)
+      // Check if it's a JSON string that contains the actual content
+      if (content.trim().startsWith('{') || content.trim().startsWith('[')) {
+        try {
+          const parsed = JSON.parse(content);
+          if (typeof parsed === 'object' && parsed !== null) {
+            // If it's a full AI response JSON with a 'content' field, use that
+            if (parsed.content && typeof parsed.content === 'string') {
+              // Recursively process the extracted content
+              const extractedContent = parsed.content;
+              // Check if extracted content is HTML
+              if (isHTML(extractedContent)) {
+                const sanitized = sanitizeHTML(extractedContent);
+                if (sanitized.trim().startsWith('<article') || sanitized.trim().startsWith('<div')) {
+                  return `<div class="normalized-html-content">${sanitized}</div>`;
+                }
+                return `<div class="normalized-html-content"><article>${sanitized}</article></div>`;
+              }
+              // If extracted content is still JSON, try parsing again
+              if (extractedContent.trim().startsWith('{')) {
+                try {
+                  const nestedParsed = JSON.parse(extractedContent);
+                  if (nestedParsed.H2 || nestedParsed.H3 || nestedParsed.introduction || nestedParsed.sections) {
+                    return formatContentOutline(nestedParsed);
+                  }
+                } catch {
+                  // Not nested JSON, continue
+                }
+              }
+              // Use extracted content as-is (will be processed below)
+              content = extractedContent;
+            } else if (parsed.H2 || parsed.H3 || parsed.introduction || parsed.sections) {
+              // It's a content outline structure
+              return formatContentOutline(parsed);
+            }
+          }
+        } catch {
+          // Not valid JSON, continue with HTML/text processing
+        }
+      }
+      
+      // Try to parse as JSON (content outline from GPT-4o mini) - for non-brace-starting JSON
      try {
        const parsed = JSON.parse(content);
        if (typeof parsed === 'object' && (parsed.H2 || parsed.H3 || parsed.introduction || parsed.sections)) {